11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000 31da177e4SLinus Torvalds ** 425985edcSLucas De Marchi ** The background commits make this code very interrelated, and 51da177e4SLinus Torvalds ** overly complex. I need to rethink things a bit....The major players: 61da177e4SLinus Torvalds ** 71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log. 81da177e4SLinus Torvalds ** If the current transaction is too 91da177e4SLinus Torvalds ** old, it will block until the current transaction is 101da177e4SLinus Torvalds ** finished, and then start a new one. 111da177e4SLinus Torvalds ** Usually, your transaction will get joined in with 121da177e4SLinus Torvalds ** previous ones for speed. 131da177e4SLinus Torvalds ** 141da177e4SLinus Torvalds ** journal_join -- same as journal_begin, but won't block on the current 151da177e4SLinus Torvalds ** transaction regardless of age. Don't ever call 161da177e4SLinus Torvalds ** this. Ever. There are only two places it should be 171da177e4SLinus Torvalds ** called from, and they are both inside this file. 181da177e4SLinus Torvalds ** 191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction. clears any flags 201da177e4SLinus Torvalds ** that might make them get sent to disk 211da177e4SLinus Torvalds ** and then marks them BH_JDirty. Puts the buffer head 221da177e4SLinus Torvalds ** into the current transaction hash. 231da177e4SLinus Torvalds ** 241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing 251da177e4SLinus Torvalds ** otherwise, it could do an async/synchronous commit, or 261da177e4SLinus Torvalds ** a full flush of all log and real blocks in the 271da177e4SLinus Torvalds ** transaction. 281da177e4SLinus Torvalds ** 291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and 301da177e4SLinus Torvalds ** commit blocks are sent to disk. Forces commit blocks 311da177e4SLinus Torvalds ** to disk for all backgrounded commits that have been 321da177e4SLinus Torvalds ** around too long. 331da177e4SLinus Torvalds ** -- Note, if you call this as an immediate flush from 341da177e4SLinus Torvalds ** from within kupdate, it will ignore the immediate flag 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds #include <linux/time.h> 386188e10dSMatthew Wilcox #include <linux/semaphore.h> 391da177e4SLinus Torvalds #include <linux/vmalloc.h> 40f466c6fdSAl Viro #include "reiserfs.h" 411da177e4SLinus Torvalds #include <linux/kernel.h> 421da177e4SLinus Torvalds #include <linux/errno.h> 431da177e4SLinus Torvalds #include <linux/fcntl.h> 441da177e4SLinus Torvalds #include <linux/stat.h> 451da177e4SLinus Torvalds #include <linux/string.h> 461da177e4SLinus Torvalds #include <linux/buffer_head.h> 471da177e4SLinus Torvalds #include <linux/workqueue.h> 481da177e4SLinus Torvalds #include <linux/writeback.h> 491da177e4SLinus Torvalds #include <linux/blkdev.h> 503fcfab16SAndrew Morton #include <linux/backing-dev.h> 5190415deaSJeff Mahoney #include <linux/uaccess.h> 525a0e3ad6STejun Heo #include <linux/slab.h> 5390415deaSJeff Mahoney 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */ 561da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 571da177e4SLinus Torvalds j_list)) 581da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 591da177e4SLinus Torvalds j_working_list)) 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds /* the number of mounted filesystems. This is used to decide when to 621da177e4SLinus Torvalds ** start and kill the commit workqueue 631da177e4SLinus Torvalds */ 641da177e4SLinus Torvalds static int reiserfs_mounted_fs_count; 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds static struct workqueue_struct *commit_wq; 671da177e4SLinus Torvalds 681da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit 691da177e4SLinus Torvalds structs at 4k */ 701da177e4SLinus Torvalds #define BUFNR 64 /*read ahead */ 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds /* cnode stat bits. Move these into reiserfs_fs.h */ 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 751da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 761da177e4SLinus Torvalds 771da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 781da177e4SLinus Torvalds #define BLOCK_DIRTIED 5 791da177e4SLinus Torvalds 801da177e4SLinus Torvalds /* journal list state bits */ 811da177e4SLinus Torvalds #define LIST_TOUCHED 1 821da177e4SLinus Torvalds #define LIST_DIRTY 2 831da177e4SLinus Torvalds #define LIST_COMMIT_PENDING 4 /* someone will commit this list */ 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds /* flags for do_journal_end */ 861da177e4SLinus Torvalds #define FLUSH_ALL 1 /* flush commit and real blocks */ 871da177e4SLinus Torvalds #define COMMIT_NOW 2 /* end and commit this transaction */ 881da177e4SLinus Torvalds #define WAIT 4 /* wait for the log blocks to hit the disk */ 891da177e4SLinus Torvalds 90bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *, 91bd4c625cSLinus Torvalds struct super_block *, unsigned long nblocks, 92bd4c625cSLinus Torvalds int flags); 93bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s, 94bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall); 95bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s, 96bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall); 971da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn); 98bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th, 99a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks); 1004385bab1SAl Viro static void release_journal_dev(struct super_block *super, 1011da177e4SLinus Torvalds struct reiserfs_journal *journal); 1021da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s, 1031da177e4SLinus Torvalds struct reiserfs_journal_list *jl); 104c4028958SDavid Howells static void flush_async_commits(struct work_struct *work); 1051da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s); 1061da177e4SLinus Torvalds 1071da177e4SLinus Torvalds /* values for join in do_journal_begin_r */ 1081da177e4SLinus Torvalds enum { 1091da177e4SLinus Torvalds JBEGIN_REG = 0, /* regular journal begin */ 1101da177e4SLinus Torvalds JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 1111da177e4SLinus Torvalds JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 1121da177e4SLinus Torvalds }; 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 115a9dd3643SJeff Mahoney struct super_block *sb, 1161da177e4SLinus Torvalds unsigned long nblocks, int join); 1171da177e4SLinus Torvalds 118a9dd3643SJeff Mahoney static void init_journal_hash(struct super_block *sb) 119bd4c625cSLinus Torvalds { 120a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 121bd4c625cSLinus Torvalds memset(journal->j_hash_table, 0, 122bd4c625cSLinus Torvalds JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 1231da177e4SLinus Torvalds } 1241da177e4SLinus Torvalds 1251da177e4SLinus Torvalds /* 1261da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to 1271da177e4SLinus Torvalds ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 1281da177e4SLinus Torvalds ** more details. 1291da177e4SLinus Torvalds */ 130bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) 131bd4c625cSLinus Torvalds { 1321da177e4SLinus Torvalds if (bh) { 1331da177e4SLinus Torvalds clear_buffer_dirty(bh); 1341da177e4SLinus Torvalds clear_buffer_journal_test(bh); 1351da177e4SLinus Torvalds } 1361da177e4SLinus Torvalds return 0; 1371da177e4SLinus Torvalds } 1381da177e4SLinus Torvalds 139bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block 140a9dd3643SJeff Mahoney *sb) 141bd4c625cSLinus Torvalds { 1421da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn; 1431da177e4SLinus Torvalds static int id; 1441da177e4SLinus Torvalds 145d739b42bSPekka Enberg bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); 1461da177e4SLinus Torvalds if (!bn) { 1471da177e4SLinus Torvalds return NULL; 1481da177e4SLinus Torvalds } 149a9dd3643SJeff Mahoney bn->data = kzalloc(sb->s_blocksize, GFP_NOFS); 1501da177e4SLinus Torvalds if (!bn->data) { 151d739b42bSPekka Enberg kfree(bn); 1521da177e4SLinus Torvalds return NULL; 1531da177e4SLinus Torvalds } 1541da177e4SLinus Torvalds bn->id = id++; 1551da177e4SLinus Torvalds INIT_LIST_HEAD(&bn->list); 1561da177e4SLinus Torvalds return bn; 1571da177e4SLinus Torvalds } 1581da177e4SLinus Torvalds 159a9dd3643SJeff Mahoney static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb) 160bd4c625cSLinus Torvalds { 161a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1621da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn = NULL; 1631da177e4SLinus Torvalds struct list_head *entry = journal->j_bitmap_nodes.next; 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds journal->j_used_bitmap_nodes++; 1661da177e4SLinus Torvalds repeat: 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds if (entry != &journal->j_bitmap_nodes) { 1691da177e4SLinus Torvalds bn = list_entry(entry, struct reiserfs_bitmap_node, list); 1701da177e4SLinus Torvalds list_del(entry); 171a9dd3643SJeff Mahoney memset(bn->data, 0, sb->s_blocksize); 1721da177e4SLinus Torvalds journal->j_free_bitmap_nodes--; 1731da177e4SLinus Torvalds return bn; 1741da177e4SLinus Torvalds } 175a9dd3643SJeff Mahoney bn = allocate_bitmap_node(sb); 1761da177e4SLinus Torvalds if (!bn) { 1771da177e4SLinus Torvalds yield(); 1781da177e4SLinus Torvalds goto repeat; 1791da177e4SLinus Torvalds } 1801da177e4SLinus Torvalds return bn; 1811da177e4SLinus Torvalds } 182a9dd3643SJeff Mahoney static inline void free_bitmap_node(struct super_block *sb, 183bd4c625cSLinus Torvalds struct reiserfs_bitmap_node *bn) 184bd4c625cSLinus Torvalds { 185a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1861da177e4SLinus Torvalds journal->j_used_bitmap_nodes--; 1871da177e4SLinus Torvalds if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 188d739b42bSPekka Enberg kfree(bn->data); 189d739b42bSPekka Enberg kfree(bn); 1901da177e4SLinus Torvalds } else { 1911da177e4SLinus Torvalds list_add(&bn->list, &journal->j_bitmap_nodes); 1921da177e4SLinus Torvalds journal->j_free_bitmap_nodes++; 1931da177e4SLinus Torvalds } 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds 196a9dd3643SJeff Mahoney static void allocate_bitmap_nodes(struct super_block *sb) 197bd4c625cSLinus Torvalds { 1981da177e4SLinus Torvalds int i; 199a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 2001da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn = NULL; 2011da177e4SLinus Torvalds for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { 202a9dd3643SJeff Mahoney bn = allocate_bitmap_node(sb); 2031da177e4SLinus Torvalds if (bn) { 2041da177e4SLinus Torvalds list_add(&bn->list, &journal->j_bitmap_nodes); 2051da177e4SLinus Torvalds journal->j_free_bitmap_nodes++; 2061da177e4SLinus Torvalds } else { 2070222e657SJeff Mahoney break; /* this is ok, we'll try again when more are needed */ 2081da177e4SLinus Torvalds } 2091da177e4SLinus Torvalds } 2101da177e4SLinus Torvalds } 2111da177e4SLinus Torvalds 212a9dd3643SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *sb, 2133ee16670SJeff Mahoney b_blocknr_t block, 214bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb) 215bd4c625cSLinus Torvalds { 216a9dd3643SJeff Mahoney unsigned int bmap_nr = block / (sb->s_blocksize << 3); 217a9dd3643SJeff Mahoney unsigned int bit_nr = block % (sb->s_blocksize << 3); 2181da177e4SLinus Torvalds 2191da177e4SLinus Torvalds if (!jb->bitmaps[bmap_nr]) { 220a9dd3643SJeff Mahoney jb->bitmaps[bmap_nr] = get_bitmap_node(sb); 2211da177e4SLinus Torvalds } 2221da177e4SLinus Torvalds set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); 2231da177e4SLinus Torvalds return 0; 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds 226a9dd3643SJeff Mahoney static void cleanup_bitmap_list(struct super_block *sb, 227bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb) 228bd4c625cSLinus Torvalds { 2291da177e4SLinus Torvalds int i; 2301da177e4SLinus Torvalds if (jb->bitmaps == NULL) 2311da177e4SLinus Torvalds return; 2321da177e4SLinus Torvalds 233a9dd3643SJeff Mahoney for (i = 0; i < reiserfs_bmap_count(sb); i++) { 2341da177e4SLinus Torvalds if (jb->bitmaps[i]) { 235a9dd3643SJeff Mahoney free_bitmap_node(sb, jb->bitmaps[i]); 2361da177e4SLinus Torvalds jb->bitmaps[i] = NULL; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds } 2391da177e4SLinus Torvalds } 2401da177e4SLinus Torvalds 2411da177e4SLinus Torvalds /* 2421da177e4SLinus Torvalds ** only call this on FS unmount. 2431da177e4SLinus Torvalds */ 244a9dd3643SJeff Mahoney static int free_list_bitmaps(struct super_block *sb, 245bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb_array) 246bd4c625cSLinus Torvalds { 2471da177e4SLinus Torvalds int i; 2481da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 2491da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 2501da177e4SLinus Torvalds jb = jb_array + i; 2511da177e4SLinus Torvalds jb->journal_list = NULL; 252a9dd3643SJeff Mahoney cleanup_bitmap_list(sb, jb); 2531da177e4SLinus Torvalds vfree(jb->bitmaps); 2541da177e4SLinus Torvalds jb->bitmaps = NULL; 2551da177e4SLinus Torvalds } 2561da177e4SLinus Torvalds return 0; 2571da177e4SLinus Torvalds } 2581da177e4SLinus Torvalds 259a9dd3643SJeff Mahoney static int free_bitmap_nodes(struct super_block *sb) 260bd4c625cSLinus Torvalds { 261a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 2621da177e4SLinus Torvalds struct list_head *next = journal->j_bitmap_nodes.next; 2631da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn; 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds while (next != &journal->j_bitmap_nodes) { 2661da177e4SLinus Torvalds bn = list_entry(next, struct reiserfs_bitmap_node, list); 2671da177e4SLinus Torvalds list_del(next); 268d739b42bSPekka Enberg kfree(bn->data); 269d739b42bSPekka Enberg kfree(bn); 2701da177e4SLinus Torvalds next = journal->j_bitmap_nodes.next; 2711da177e4SLinus Torvalds journal->j_free_bitmap_nodes--; 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds 2741da177e4SLinus Torvalds return 0; 2751da177e4SLinus Torvalds } 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds /* 2781da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 2791da177e4SLinus Torvalds ** jb_array is the array to be filled in. 2801da177e4SLinus Torvalds */ 281a9dd3643SJeff Mahoney int reiserfs_allocate_list_bitmaps(struct super_block *sb, 2821da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb_array, 2833ee16670SJeff Mahoney unsigned int bmap_nr) 284bd4c625cSLinus Torvalds { 2851da177e4SLinus Torvalds int i; 2861da177e4SLinus Torvalds int failed = 0; 2871da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 2881da177e4SLinus Torvalds int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); 2891da177e4SLinus Torvalds 2901da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 2911da177e4SLinus Torvalds jb = jb_array + i; 2921da177e4SLinus Torvalds jb->journal_list = NULL; 293558feb08SJoe Perches jb->bitmaps = vzalloc(mem); 2941da177e4SLinus Torvalds if (!jb->bitmaps) { 295a9dd3643SJeff Mahoney reiserfs_warning(sb, "clm-2000", "unable to " 29645b03d5eSJeff Mahoney "allocate bitmaps for journal lists"); 2971da177e4SLinus Torvalds failed = 1; 2981da177e4SLinus Torvalds break; 2991da177e4SLinus Torvalds } 3001da177e4SLinus Torvalds } 3011da177e4SLinus Torvalds if (failed) { 302a9dd3643SJeff Mahoney free_list_bitmaps(sb, jb_array); 3031da177e4SLinus Torvalds return -1; 3041da177e4SLinus Torvalds } 3051da177e4SLinus Torvalds return 0; 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds 3081da177e4SLinus Torvalds /* 3091da177e4SLinus Torvalds ** find an available list bitmap. If you can't find one, flush a commit list 3101da177e4SLinus Torvalds ** and try again 3111da177e4SLinus Torvalds */ 312a9dd3643SJeff Mahoney static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, 313bd4c625cSLinus Torvalds struct reiserfs_journal_list 314bd4c625cSLinus Torvalds *jl) 315bd4c625cSLinus Torvalds { 3161da177e4SLinus Torvalds int i, j; 317a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 3181da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = NULL; 3191da177e4SLinus Torvalds 3201da177e4SLinus Torvalds for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { 3211da177e4SLinus Torvalds i = journal->j_list_bitmap_index; 3221da177e4SLinus Torvalds journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; 3231da177e4SLinus Torvalds jb = journal->j_list_bitmap + i; 3241da177e4SLinus Torvalds if (journal->j_list_bitmap[i].journal_list) { 325a9dd3643SJeff Mahoney flush_commit_list(sb, 326bd4c625cSLinus Torvalds journal->j_list_bitmap[i]. 327bd4c625cSLinus Torvalds journal_list, 1); 3281da177e4SLinus Torvalds if (!journal->j_list_bitmap[i].journal_list) { 3291da177e4SLinus Torvalds break; 3301da177e4SLinus Torvalds } 3311da177e4SLinus Torvalds } else { 3321da177e4SLinus Torvalds break; 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds } 3351da177e4SLinus Torvalds if (jb->journal_list) { /* double check to make sure if flushed correctly */ 3361da177e4SLinus Torvalds return NULL; 3371da177e4SLinus Torvalds } 3381da177e4SLinus Torvalds jb->journal_list = jl; 3391da177e4SLinus Torvalds return jb; 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds /* 3431da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list. 3441da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers 3451da177e4SLinus Torvalds ** returns NULL on failure 3461da177e4SLinus Torvalds */ 347bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) 348bd4c625cSLinus Torvalds { 3491da177e4SLinus Torvalds struct reiserfs_journal_cnode *head; 3501da177e4SLinus Torvalds int i; 3511da177e4SLinus Torvalds if (num_cnodes <= 0) { 3521da177e4SLinus Torvalds return NULL; 3531da177e4SLinus Torvalds } 354558feb08SJoe Perches head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); 3551da177e4SLinus Torvalds if (!head) { 3561da177e4SLinus Torvalds return NULL; 3571da177e4SLinus Torvalds } 3581da177e4SLinus Torvalds head[0].prev = NULL; 3591da177e4SLinus Torvalds head[0].next = head + 1; 3601da177e4SLinus Torvalds for (i = 1; i < num_cnodes; i++) { 3611da177e4SLinus Torvalds head[i].prev = head + (i - 1); 3621da177e4SLinus Torvalds head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ 3631da177e4SLinus Torvalds } 3641da177e4SLinus Torvalds head[num_cnodes - 1].next = NULL; 3651da177e4SLinus Torvalds return head; 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds 3681da177e4SLinus Torvalds /* 3691da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure 3701da177e4SLinus Torvalds */ 371a9dd3643SJeff Mahoney static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) 372bd4c625cSLinus Torvalds { 3731da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 374a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 3751da177e4SLinus Torvalds 376a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "get_cnode"); 3771da177e4SLinus Torvalds 3781da177e4SLinus Torvalds if (journal->j_cnode_free <= 0) { 3791da177e4SLinus Torvalds return NULL; 3801da177e4SLinus Torvalds } 3811da177e4SLinus Torvalds journal->j_cnode_used++; 3821da177e4SLinus Torvalds journal->j_cnode_free--; 3831da177e4SLinus Torvalds cn = journal->j_cnode_free_list; 3841da177e4SLinus Torvalds if (!cn) { 3851da177e4SLinus Torvalds return cn; 3861da177e4SLinus Torvalds } 3871da177e4SLinus Torvalds if (cn->next) { 3881da177e4SLinus Torvalds cn->next->prev = NULL; 3891da177e4SLinus Torvalds } 3901da177e4SLinus Torvalds journal->j_cnode_free_list = cn->next; 3911da177e4SLinus Torvalds memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); 3921da177e4SLinus Torvalds return cn; 3931da177e4SLinus Torvalds } 3941da177e4SLinus Torvalds 3951da177e4SLinus Torvalds /* 3961da177e4SLinus Torvalds ** returns a cnode to the free list 3971da177e4SLinus Torvalds */ 398a9dd3643SJeff Mahoney static void free_cnode(struct super_block *sb, 399bd4c625cSLinus Torvalds struct reiserfs_journal_cnode *cn) 400bd4c625cSLinus Torvalds { 401a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 4021da177e4SLinus Torvalds 403a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "free_cnode"); 4041da177e4SLinus Torvalds 4051da177e4SLinus Torvalds journal->j_cnode_used--; 4061da177e4SLinus Torvalds journal->j_cnode_free++; 4071da177e4SLinus Torvalds /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ 4081da177e4SLinus Torvalds cn->next = journal->j_cnode_free_list; 4091da177e4SLinus Torvalds if (journal->j_cnode_free_list) { 4101da177e4SLinus Torvalds journal->j_cnode_free_list->prev = cn; 4111da177e4SLinus Torvalds } 4121da177e4SLinus Torvalds cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ 4131da177e4SLinus Torvalds journal->j_cnode_free_list = cn; 4141da177e4SLinus Torvalds } 4151da177e4SLinus Torvalds 416bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh) 417bd4c625cSLinus Torvalds { 4181da177e4SLinus Torvalds clear_buffer_journal_prepared(bh); 4191da177e4SLinus Torvalds clear_buffer_journal_restore_dirty(bh); 4201da177e4SLinus Torvalds } 4211da177e4SLinus Torvalds 4221da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */ 423bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 424bd4c625cSLinus Torvalds super_block 425bd4c625cSLinus Torvalds *sb, 426bd4c625cSLinus Torvalds struct 427bd4c625cSLinus Torvalds reiserfs_journal_cnode 428bd4c625cSLinus Torvalds **table, 4291da177e4SLinus Torvalds long bl) 4301da177e4SLinus Torvalds { 4311da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 4321da177e4SLinus Torvalds cn = journal_hash(table, sb, bl); 4331da177e4SLinus Torvalds while (cn) { 4341da177e4SLinus Torvalds if (cn->blocknr == bl && cn->sb == sb) 4351da177e4SLinus Torvalds return cn; 4361da177e4SLinus Torvalds cn = cn->hnext; 4371da177e4SLinus Torvalds } 4381da177e4SLinus Torvalds return (struct reiserfs_journal_cnode *)0; 4391da177e4SLinus Torvalds } 4401da177e4SLinus Torvalds 4411da177e4SLinus Torvalds /* 4421da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated 4431da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever 4441da177e4SLinus Torvalds ** being overwritten by a replay after crashing. 4451da177e4SLinus Torvalds ** 4461da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting 4471da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make 4481da177e4SLinus Torvalds ** sure you never write the block without logging it. 4491da177e4SLinus Torvalds ** 4501da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward. 4511da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search 4521da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl. Then, we return that 4531da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try. 4541da177e4SLinus Torvalds ** 4551da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't 4561da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal 4571da177e4SLinus Torvalds ** 4581da177e4SLinus Torvalds */ 459a9dd3643SJeff Mahoney int reiserfs_in_journal(struct super_block *sb, 4603ee16670SJeff Mahoney unsigned int bmap_nr, int bit_nr, int search_all, 461bd4c625cSLinus Torvalds b_blocknr_t * next_zero_bit) 462bd4c625cSLinus Torvalds { 463a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 4641da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 4651da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 4661da177e4SLinus Torvalds int i; 4671da177e4SLinus Torvalds unsigned long bl; 4681da177e4SLinus Torvalds 4691da177e4SLinus Torvalds *next_zero_bit = 0; /* always start this at zero. */ 4701da177e4SLinus Torvalds 471a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.in_journal); 4721da177e4SLinus Torvalds /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 4731da177e4SLinus Torvalds ** if we crash before the transaction that freed it commits, this transaction won't 4741da177e4SLinus Torvalds ** have committed either, and the block will never be written 4751da177e4SLinus Torvalds */ 4761da177e4SLinus Torvalds if (search_all) { 4771da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 478a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.in_journal_bitmap); 4791da177e4SLinus Torvalds jb = journal->j_list_bitmap + i; 4801da177e4SLinus Torvalds if (jb->journal_list && jb->bitmaps[bmap_nr] && 481bd4c625cSLinus Torvalds test_bit(bit_nr, 482bd4c625cSLinus Torvalds (unsigned long *)jb->bitmaps[bmap_nr]-> 483bd4c625cSLinus Torvalds data)) { 484bd4c625cSLinus Torvalds *next_zero_bit = 485bd4c625cSLinus Torvalds find_next_zero_bit((unsigned long *) 486bd4c625cSLinus Torvalds (jb->bitmaps[bmap_nr]-> 487bd4c625cSLinus Torvalds data), 488a9dd3643SJeff Mahoney sb->s_blocksize << 3, 489bd4c625cSLinus Torvalds bit_nr + 1); 4901da177e4SLinus Torvalds return 1; 4911da177e4SLinus Torvalds } 4921da177e4SLinus Torvalds } 4931da177e4SLinus Torvalds } 4941da177e4SLinus Torvalds 495a9dd3643SJeff Mahoney bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr; 4961da177e4SLinus Torvalds /* is it in any old transactions? */ 497bd4c625cSLinus Torvalds if (search_all 498bd4c625cSLinus Torvalds && (cn = 499a9dd3643SJeff Mahoney get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) { 5001da177e4SLinus Torvalds return 1; 5011da177e4SLinus Torvalds } 5021da177e4SLinus Torvalds 5031da177e4SLinus Torvalds /* is it in the current transaction. This should never happen */ 504a9dd3643SJeff Mahoney if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) { 5051da177e4SLinus Torvalds BUG(); 5061da177e4SLinus Torvalds return 1; 5071da177e4SLinus Torvalds } 5081da177e4SLinus Torvalds 509a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.in_journal_reusable); 5101da177e4SLinus Torvalds /* safe for reuse */ 5111da177e4SLinus Torvalds return 0; 5121da177e4SLinus Torvalds } 5131da177e4SLinus Torvalds 5141da177e4SLinus Torvalds /* insert cn into table 5151da177e4SLinus Torvalds */ 516bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, 517bd4c625cSLinus Torvalds struct reiserfs_journal_cnode *cn) 518bd4c625cSLinus Torvalds { 5191da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn_orig; 5201da177e4SLinus Torvalds 5211da177e4SLinus Torvalds cn_orig = journal_hash(table, cn->sb, cn->blocknr); 5221da177e4SLinus Torvalds cn->hnext = cn_orig; 5231da177e4SLinus Torvalds cn->hprev = NULL; 5241da177e4SLinus Torvalds if (cn_orig) { 5251da177e4SLinus Torvalds cn_orig->hprev = cn; 5261da177e4SLinus Torvalds } 5271da177e4SLinus Torvalds journal_hash(table, cn->sb, cn->blocknr) = cn; 5281da177e4SLinus Torvalds } 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds /* lock the current transaction */ 531a9dd3643SJeff Mahoney static inline void lock_journal(struct super_block *sb) 532bd4c625cSLinus Torvalds { 533a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.lock_journal); 5348ebc4232SFrederic Weisbecker 5358ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb); 5361da177e4SLinus Torvalds } 5371da177e4SLinus Torvalds 5381da177e4SLinus Torvalds /* unlock the current transaction */ 539a9dd3643SJeff Mahoney static inline void unlock_journal(struct super_block *sb) 540bd4c625cSLinus Torvalds { 541a9dd3643SJeff Mahoney mutex_unlock(&SB_JOURNAL(sb)->j_mutex); 5421da177e4SLinus Torvalds } 5431da177e4SLinus Torvalds 5441da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl) 5451da177e4SLinus Torvalds { 5461da177e4SLinus Torvalds jl->j_refcount++; 5471da177e4SLinus Torvalds } 5481da177e4SLinus Torvalds 5491da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s, 5501da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 5511da177e4SLinus Torvalds { 5521da177e4SLinus Torvalds if (jl->j_refcount < 1) { 553c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d", 554bd4c625cSLinus Torvalds jl->j_trans_id, jl->j_refcount); 5551da177e4SLinus Torvalds } 5561da177e4SLinus Torvalds if (--jl->j_refcount == 0) 557d739b42bSPekka Enberg kfree(jl); 5581da177e4SLinus Torvalds } 5591da177e4SLinus Torvalds 5601da177e4SLinus Torvalds /* 5611da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again. 5621da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 5631da177e4SLinus Torvalds ** transaction. 5641da177e4SLinus Torvalds */ 565a9dd3643SJeff Mahoney static void cleanup_freed_for_journal_list(struct super_block *sb, 566bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 567bd4c625cSLinus Torvalds { 5681da177e4SLinus Torvalds 5691da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; 5701da177e4SLinus Torvalds if (jb) { 571a9dd3643SJeff Mahoney cleanup_bitmap_list(sb, jb); 5721da177e4SLinus Torvalds } 5731da177e4SLinus Torvalds jl->j_list_bitmap->journal_list = NULL; 5741da177e4SLinus Torvalds jl->j_list_bitmap = NULL; 5751da177e4SLinus Torvalds } 5761da177e4SLinus Torvalds 5771da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s, 578600ed416SJeff Mahoney unsigned int trans_id) 5791da177e4SLinus Torvalds { 5801da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 5811da177e4SLinus Torvalds struct list_head *entry = &journal->j_journal_list; 5821da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 5831da177e4SLinus Torvalds 5841da177e4SLinus Torvalds if (!list_empty(entry)) { 5851da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry->next); 5861da177e4SLinus Torvalds if (jl->j_trans_id <= trans_id) { 5871da177e4SLinus Torvalds return 1; 5881da177e4SLinus Torvalds } 5891da177e4SLinus Torvalds } 5901da177e4SLinus Torvalds return 0; 5911da177e4SLinus Torvalds } 5921da177e4SLinus Torvalds 593398c95bdSChris Mason /* 594398c95bdSChris Mason * If page->mapping was null, we failed to truncate this page for 595398c95bdSChris Mason * some reason. Most likely because it was truncated after being 596398c95bdSChris Mason * logged via data=journal. 597398c95bdSChris Mason * 598398c95bdSChris Mason * This does a check to see if the buffer belongs to one of these 599398c95bdSChris Mason * lost pages before doing the final put_bh. If page->mapping was 600398c95bdSChris Mason * null, it tries to free buffers on the page, which should make the 601398c95bdSChris Mason * final page_cache_release drop the page from the lru. 602398c95bdSChris Mason */ 603398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh) 604398c95bdSChris Mason { 605398c95bdSChris Mason struct page *page = bh->b_page; 606529ae9aaSNick Piggin if (!page->mapping && trylock_page(page)) { 607398c95bdSChris Mason page_cache_get(page); 608398c95bdSChris Mason put_bh(bh); 609398c95bdSChris Mason if (!page->mapping) 610398c95bdSChris Mason try_to_free_buffers(page); 611398c95bdSChris Mason unlock_page(page); 612398c95bdSChris Mason page_cache_release(page); 613398c95bdSChris Mason } else { 614398c95bdSChris Mason put_bh(bh); 615398c95bdSChris Mason } 616398c95bdSChris Mason } 617398c95bdSChris Mason 618bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 619bd4c625cSLinus Torvalds { 6201da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 6211da177e4SLinus Torvalds 6221da177e4SLinus Torvalds if (buffer_journaled(bh)) { 62345b03d5eSJeff Mahoney reiserfs_warning(NULL, "clm-2084", 62445b03d5eSJeff Mahoney "pinned buffer %lu:%s sent to disk", 6251da177e4SLinus Torvalds bh->b_blocknr, bdevname(bh->b_bdev, b)); 6261da177e4SLinus Torvalds } 6271da177e4SLinus Torvalds if (uptodate) 6281da177e4SLinus Torvalds set_buffer_uptodate(bh); 6291da177e4SLinus Torvalds else 6301da177e4SLinus Torvalds clear_buffer_uptodate(bh); 631398c95bdSChris Mason 6321da177e4SLinus Torvalds unlock_buffer(bh); 633398c95bdSChris Mason release_buffer_page(bh); 6341da177e4SLinus Torvalds } 6351da177e4SLinus Torvalds 636bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) 637bd4c625cSLinus Torvalds { 6381da177e4SLinus Torvalds if (uptodate) 6391da177e4SLinus Torvalds set_buffer_uptodate(bh); 6401da177e4SLinus Torvalds else 6411da177e4SLinus Torvalds clear_buffer_uptodate(bh); 6421da177e4SLinus Torvalds unlock_buffer(bh); 6431da177e4SLinus Torvalds put_bh(bh); 6441da177e4SLinus Torvalds } 6451da177e4SLinus Torvalds 646bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh) 647bd4c625cSLinus Torvalds { 6481da177e4SLinus Torvalds get_bh(bh); 6491da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_buffer_io_sync; 6501da177e4SLinus Torvalds clear_buffer_journal_new(bh); 6511da177e4SLinus Torvalds clear_buffer_dirty(bh); 6521da177e4SLinus Torvalds if (!test_clear_buffer_journal_test(bh)) 6531da177e4SLinus Torvalds BUG(); 6541da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 6551da177e4SLinus Torvalds BUG(); 6561da177e4SLinus Torvalds submit_bh(WRITE, bh); 6571da177e4SLinus Torvalds } 6581da177e4SLinus Torvalds 659bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh) 660bd4c625cSLinus Torvalds { 6611da177e4SLinus Torvalds get_bh(bh); 6621da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_ordered_io; 6631da177e4SLinus Torvalds clear_buffer_dirty(bh); 6641da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 6651da177e4SLinus Torvalds BUG(); 6661da177e4SLinus Torvalds submit_bh(WRITE, bh); 6671da177e4SLinus Torvalds } 6681da177e4SLinus Torvalds 6691da177e4SLinus Torvalds #define CHUNK_SIZE 32 6701da177e4SLinus Torvalds struct buffer_chunk { 6711da177e4SLinus Torvalds struct buffer_head *bh[CHUNK_SIZE]; 6721da177e4SLinus Torvalds int nr; 6731da177e4SLinus Torvalds }; 6741da177e4SLinus Torvalds 675bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk) 676bd4c625cSLinus Torvalds { 6771da177e4SLinus Torvalds int i; 6781da177e4SLinus Torvalds for (i = 0; i < chunk->nr; i++) { 6791da177e4SLinus Torvalds submit_logged_buffer(chunk->bh[i]); 6801da177e4SLinus Torvalds } 6811da177e4SLinus Torvalds chunk->nr = 0; 6821da177e4SLinus Torvalds } 6831da177e4SLinus Torvalds 684bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk) 685bd4c625cSLinus Torvalds { 6861da177e4SLinus Torvalds int i; 6871da177e4SLinus Torvalds for (i = 0; i < chunk->nr; i++) { 6881da177e4SLinus Torvalds submit_ordered_buffer(chunk->bh[i]); 6891da177e4SLinus Torvalds } 6901da177e4SLinus Torvalds chunk->nr = 0; 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds 6931da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 694bd4c625cSLinus Torvalds spinlock_t * lock, void (fn) (struct buffer_chunk *)) 6951da177e4SLinus Torvalds { 6961da177e4SLinus Torvalds int ret = 0; 69714a61442SEric Sesterhenn BUG_ON(chunk->nr >= CHUNK_SIZE); 6981da177e4SLinus Torvalds chunk->bh[chunk->nr++] = bh; 6991da177e4SLinus Torvalds if (chunk->nr >= CHUNK_SIZE) { 7001da177e4SLinus Torvalds ret = 1; 7011da177e4SLinus Torvalds if (lock) 7021da177e4SLinus Torvalds spin_unlock(lock); 7031da177e4SLinus Torvalds fn(chunk); 7041da177e4SLinus Torvalds if (lock) 7051da177e4SLinus Torvalds spin_lock(lock); 7061da177e4SLinus Torvalds } 7071da177e4SLinus Torvalds return ret; 7081da177e4SLinus Torvalds } 7091da177e4SLinus Torvalds 7101da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); 711bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void) 712bd4c625cSLinus Torvalds { 7131da177e4SLinus Torvalds struct reiserfs_jh *jh; 7141da177e4SLinus Torvalds while (1) { 7151da177e4SLinus Torvalds jh = kmalloc(sizeof(*jh), GFP_NOFS); 7161da177e4SLinus Torvalds if (jh) { 7171da177e4SLinus Torvalds atomic_inc(&nr_reiserfs_jh); 7181da177e4SLinus Torvalds return jh; 7191da177e4SLinus Torvalds } 7201da177e4SLinus Torvalds yield(); 7211da177e4SLinus Torvalds } 7221da177e4SLinus Torvalds } 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds /* 7251da177e4SLinus Torvalds * we want to free the jh when the buffer has been written 7261da177e4SLinus Torvalds * and waited on 7271da177e4SLinus Torvalds */ 728bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh) 729bd4c625cSLinus Torvalds { 7301da177e4SLinus Torvalds struct reiserfs_jh *jh; 7311da177e4SLinus Torvalds 7321da177e4SLinus Torvalds jh = bh->b_private; 7331da177e4SLinus Torvalds if (jh) { 7341da177e4SLinus Torvalds bh->b_private = NULL; 7351da177e4SLinus Torvalds jh->bh = NULL; 7361da177e4SLinus Torvalds list_del_init(&jh->list); 7371da177e4SLinus Torvalds kfree(jh); 7381da177e4SLinus Torvalds if (atomic_read(&nr_reiserfs_jh) <= 0) 7391da177e4SLinus Torvalds BUG(); 7401da177e4SLinus Torvalds atomic_dec(&nr_reiserfs_jh); 7411da177e4SLinus Torvalds put_bh(bh); 7421da177e4SLinus Torvalds } 7431da177e4SLinus Torvalds } 7441da177e4SLinus Torvalds 7451da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, 7461da177e4SLinus Torvalds int tail) 7471da177e4SLinus Torvalds { 7481da177e4SLinus Torvalds struct reiserfs_jh *jh; 7491da177e4SLinus Torvalds 7501da177e4SLinus Torvalds if (bh->b_private) { 7511da177e4SLinus Torvalds spin_lock(&j->j_dirty_buffers_lock); 7521da177e4SLinus Torvalds if (!bh->b_private) { 7531da177e4SLinus Torvalds spin_unlock(&j->j_dirty_buffers_lock); 7541da177e4SLinus Torvalds goto no_jh; 7551da177e4SLinus Torvalds } 7561da177e4SLinus Torvalds jh = bh->b_private; 7571da177e4SLinus Torvalds list_del_init(&jh->list); 7581da177e4SLinus Torvalds } else { 7591da177e4SLinus Torvalds no_jh: 7601da177e4SLinus Torvalds get_bh(bh); 7611da177e4SLinus Torvalds jh = alloc_jh(); 7621da177e4SLinus Torvalds spin_lock(&j->j_dirty_buffers_lock); 7631da177e4SLinus Torvalds /* buffer must be locked for __add_jh, should be able to have 7641da177e4SLinus Torvalds * two adds at the same time 7651da177e4SLinus Torvalds */ 76614a61442SEric Sesterhenn BUG_ON(bh->b_private); 7671da177e4SLinus Torvalds jh->bh = bh; 7681da177e4SLinus Torvalds bh->b_private = jh; 7691da177e4SLinus Torvalds } 7701da177e4SLinus Torvalds jh->jl = j->j_current_jl; 7711da177e4SLinus Torvalds if (tail) 7721da177e4SLinus Torvalds list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); 7731da177e4SLinus Torvalds else { 7741da177e4SLinus Torvalds list_add_tail(&jh->list, &jh->jl->j_bh_list); 7751da177e4SLinus Torvalds } 7761da177e4SLinus Torvalds spin_unlock(&j->j_dirty_buffers_lock); 7771da177e4SLinus Torvalds return 0; 7781da177e4SLinus Torvalds } 7791da177e4SLinus Torvalds 780bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) 781bd4c625cSLinus Torvalds { 7821da177e4SLinus Torvalds return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); 7831da177e4SLinus Torvalds } 784bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) 785bd4c625cSLinus Torvalds { 7861da177e4SLinus Torvalds return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); 7871da177e4SLinus Torvalds } 7881da177e4SLinus Torvalds 7891da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) 7901da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock, 7911da177e4SLinus Torvalds struct reiserfs_journal *j, 7921da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 7931da177e4SLinus Torvalds struct list_head *list) 7941da177e4SLinus Torvalds { 7951da177e4SLinus Torvalds struct buffer_head *bh; 7961da177e4SLinus Torvalds struct reiserfs_jh *jh; 7971da177e4SLinus Torvalds int ret = j->j_errno; 7981da177e4SLinus Torvalds struct buffer_chunk chunk; 7991da177e4SLinus Torvalds struct list_head tmp; 8001da177e4SLinus Torvalds INIT_LIST_HEAD(&tmp); 8011da177e4SLinus Torvalds 8021da177e4SLinus Torvalds chunk.nr = 0; 8031da177e4SLinus Torvalds spin_lock(lock); 8041da177e4SLinus Torvalds while (!list_empty(list)) { 8051da177e4SLinus Torvalds jh = JH_ENTRY(list->next); 8061da177e4SLinus Torvalds bh = jh->bh; 8071da177e4SLinus Torvalds get_bh(bh); 808ca5de404SNick Piggin if (!trylock_buffer(bh)) { 8091da177e4SLinus Torvalds if (!buffer_dirty(bh)) { 810f116629dSAkinobu Mita list_move(&jh->list, &tmp); 8111da177e4SLinus Torvalds goto loop_next; 8121da177e4SLinus Torvalds } 8131da177e4SLinus Torvalds spin_unlock(lock); 8141da177e4SLinus Torvalds if (chunk.nr) 8151da177e4SLinus Torvalds write_ordered_chunk(&chunk); 8161da177e4SLinus Torvalds wait_on_buffer(bh); 8171da177e4SLinus Torvalds cond_resched(); 8181da177e4SLinus Torvalds spin_lock(lock); 8191da177e4SLinus Torvalds goto loop_next; 8201da177e4SLinus Torvalds } 8213d4492f8SChris Mason /* in theory, dirty non-uptodate buffers should never get here, 8223d4492f8SChris Mason * but the upper layer io error paths still have a few quirks. 8233d4492f8SChris Mason * Handle them here as gracefully as we can 8243d4492f8SChris Mason */ 8253d4492f8SChris Mason if (!buffer_uptodate(bh) && buffer_dirty(bh)) { 8263d4492f8SChris Mason clear_buffer_dirty(bh); 8273d4492f8SChris Mason ret = -EIO; 8283d4492f8SChris Mason } 8291da177e4SLinus Torvalds if (buffer_dirty(bh)) { 830f116629dSAkinobu Mita list_move(&jh->list, &tmp); 8311da177e4SLinus Torvalds add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 8321da177e4SLinus Torvalds } else { 8331da177e4SLinus Torvalds reiserfs_free_jh(bh); 8341da177e4SLinus Torvalds unlock_buffer(bh); 8351da177e4SLinus Torvalds } 8361da177e4SLinus Torvalds loop_next: 8371da177e4SLinus Torvalds put_bh(bh); 8381da177e4SLinus Torvalds cond_resched_lock(lock); 8391da177e4SLinus Torvalds } 8401da177e4SLinus Torvalds if (chunk.nr) { 8411da177e4SLinus Torvalds spin_unlock(lock); 8421da177e4SLinus Torvalds write_ordered_chunk(&chunk); 8431da177e4SLinus Torvalds spin_lock(lock); 8441da177e4SLinus Torvalds } 8451da177e4SLinus Torvalds while (!list_empty(&tmp)) { 8461da177e4SLinus Torvalds jh = JH_ENTRY(tmp.prev); 8471da177e4SLinus Torvalds bh = jh->bh; 8481da177e4SLinus Torvalds get_bh(bh); 8491da177e4SLinus Torvalds reiserfs_free_jh(bh); 8501da177e4SLinus Torvalds 8511da177e4SLinus Torvalds if (buffer_locked(bh)) { 8521da177e4SLinus Torvalds spin_unlock(lock); 8531da177e4SLinus Torvalds wait_on_buffer(bh); 8541da177e4SLinus Torvalds spin_lock(lock); 8551da177e4SLinus Torvalds } 8561da177e4SLinus Torvalds if (!buffer_uptodate(bh)) { 8571da177e4SLinus Torvalds ret = -EIO; 8581da177e4SLinus Torvalds } 859d62b1b87SChris Mason /* ugly interaction with invalidatepage here. 860d62b1b87SChris Mason * reiserfs_invalidate_page will pin any buffer that has a valid 861d62b1b87SChris Mason * journal head from an older transaction. If someone else sets 862d62b1b87SChris Mason * our buffer dirty after we write it in the first loop, and 863d62b1b87SChris Mason * then someone truncates the page away, nobody will ever write 864d62b1b87SChris Mason * the buffer. We're safe if we write the page one last time 865d62b1b87SChris Mason * after freeing the journal header. 866d62b1b87SChris Mason */ 867d62b1b87SChris Mason if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { 868d62b1b87SChris Mason spin_unlock(lock); 869d62b1b87SChris Mason ll_rw_block(WRITE, 1, &bh); 870d62b1b87SChris Mason spin_lock(lock); 871d62b1b87SChris Mason } 8721da177e4SLinus Torvalds put_bh(bh); 8731da177e4SLinus Torvalds cond_resched_lock(lock); 8741da177e4SLinus Torvalds } 8751da177e4SLinus Torvalds spin_unlock(lock); 8761da177e4SLinus Torvalds return ret; 8771da177e4SLinus Torvalds } 8781da177e4SLinus Torvalds 879bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s, 880bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 881bd4c625cSLinus Torvalds { 8821da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 8831da177e4SLinus Torvalds struct reiserfs_journal_list *other_jl; 8841da177e4SLinus Torvalds struct reiserfs_journal_list *first_jl; 8851da177e4SLinus Torvalds struct list_head *entry; 886600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 887600ed416SJeff Mahoney unsigned int other_trans_id; 888600ed416SJeff Mahoney unsigned int first_trans_id; 8891da177e4SLinus Torvalds 8901da177e4SLinus Torvalds find_first: 8911da177e4SLinus Torvalds /* 8921da177e4SLinus Torvalds * first we walk backwards to find the oldest uncommitted transation 8931da177e4SLinus Torvalds */ 8941da177e4SLinus Torvalds first_jl = jl; 8951da177e4SLinus Torvalds entry = jl->j_list.prev; 8961da177e4SLinus Torvalds while (1) { 8971da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 8981da177e4SLinus Torvalds if (entry == &journal->j_journal_list || 8991da177e4SLinus Torvalds atomic_read(&other_jl->j_older_commits_done)) 9001da177e4SLinus Torvalds break; 9011da177e4SLinus Torvalds 9021da177e4SLinus Torvalds first_jl = other_jl; 9031da177e4SLinus Torvalds entry = other_jl->j_list.prev; 9041da177e4SLinus Torvalds } 9051da177e4SLinus Torvalds 9061da177e4SLinus Torvalds /* if we didn't find any older uncommitted transactions, return now */ 9071da177e4SLinus Torvalds if (first_jl == jl) { 9081da177e4SLinus Torvalds return 0; 9091da177e4SLinus Torvalds } 9101da177e4SLinus Torvalds 9111da177e4SLinus Torvalds first_trans_id = first_jl->j_trans_id; 9121da177e4SLinus Torvalds 9131da177e4SLinus Torvalds entry = &first_jl->j_list; 9141da177e4SLinus Torvalds while (1) { 9151da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 9161da177e4SLinus Torvalds other_trans_id = other_jl->j_trans_id; 9171da177e4SLinus Torvalds 9181da177e4SLinus Torvalds if (other_trans_id < trans_id) { 9191da177e4SLinus Torvalds if (atomic_read(&other_jl->j_commit_left) != 0) { 9201da177e4SLinus Torvalds flush_commit_list(s, other_jl, 0); 9211da177e4SLinus Torvalds 9221da177e4SLinus Torvalds /* list we were called with is gone, return */ 9231da177e4SLinus Torvalds if (!journal_list_still_alive(s, trans_id)) 9241da177e4SLinus Torvalds return 1; 9251da177e4SLinus Torvalds 9261da177e4SLinus Torvalds /* the one we just flushed is gone, this means all 9271da177e4SLinus Torvalds * older lists are also gone, so first_jl is no longer 9281da177e4SLinus Torvalds * valid either. Go back to the beginning. 9291da177e4SLinus Torvalds */ 930bd4c625cSLinus Torvalds if (!journal_list_still_alive 931bd4c625cSLinus Torvalds (s, other_trans_id)) { 9321da177e4SLinus Torvalds goto find_first; 9331da177e4SLinus Torvalds } 9341da177e4SLinus Torvalds } 9351da177e4SLinus Torvalds entry = entry->next; 9361da177e4SLinus Torvalds if (entry == &journal->j_journal_list) 9371da177e4SLinus Torvalds return 0; 9381da177e4SLinus Torvalds } else { 9391da177e4SLinus Torvalds return 0; 9401da177e4SLinus Torvalds } 9411da177e4SLinus Torvalds } 9421da177e4SLinus Torvalds return 0; 9431da177e4SLinus Torvalds } 944deba0f49SAdrian Bunk 945deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s) 946bd4c625cSLinus Torvalds { 9471da177e4SLinus Torvalds struct reiserfs_journal *j = SB_JOURNAL(s); 9488ebc4232SFrederic Weisbecker 9498ebc4232SFrederic Weisbecker if (atomic_read(&j->j_async_throttle)) { 950278f6679SJeff Mahoney int depth; 951278f6679SJeff Mahoney 952278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(s); 9538aa7e847SJens Axboe congestion_wait(BLK_RW_ASYNC, HZ / 10); 954278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 9558ebc4232SFrederic Weisbecker } 9568ebc4232SFrederic Weisbecker 9571da177e4SLinus Torvalds return 0; 9581da177e4SLinus Torvalds } 9591da177e4SLinus Torvalds 9601da177e4SLinus Torvalds /* 9611da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk. 9621da177e4SLinus Torvalds ** 9631da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) 9641da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk 9651da177e4SLinus Torvalds ** 9661da177e4SLinus Torvalds */ 967bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s, 968bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall) 969bd4c625cSLinus Torvalds { 9701da177e4SLinus Torvalds int i; 9713ee16670SJeff Mahoney b_blocknr_t bn; 9721da177e4SLinus Torvalds struct buffer_head *tbh = NULL; 973600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 9741da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 9751da177e4SLinus Torvalds int retval = 0; 976e0e851cfSChris Mason int write_len; 977278f6679SJeff Mahoney int depth; 9781da177e4SLinus Torvalds 9791da177e4SLinus Torvalds reiserfs_check_lock_depth(s, "flush_commit_list"); 9801da177e4SLinus Torvalds 9811da177e4SLinus Torvalds if (atomic_read(&jl->j_older_commits_done)) { 9821da177e4SLinus Torvalds return 0; 9831da177e4SLinus Torvalds } 9841da177e4SLinus Torvalds 9851da177e4SLinus Torvalds /* before we can put our commit blocks on disk, we have to make sure everyone older than 9861da177e4SLinus Torvalds ** us is on disk too 9871da177e4SLinus Torvalds */ 9881da177e4SLinus Torvalds BUG_ON(jl->j_len <= 0); 9891da177e4SLinus Torvalds BUG_ON(trans_id == journal->j_trans_id); 9901da177e4SLinus Torvalds 9911da177e4SLinus Torvalds get_journal_list(jl); 9921da177e4SLinus Torvalds if (flushall) { 9931da177e4SLinus Torvalds if (flush_older_commits(s, jl) == 1) { 9941da177e4SLinus Torvalds /* list disappeared during flush_older_commits. return */ 9951da177e4SLinus Torvalds goto put_jl; 9961da177e4SLinus Torvalds } 9971da177e4SLinus Torvalds } 9981da177e4SLinus Torvalds 9991da177e4SLinus Torvalds /* make sure nobody is trying to flush this one at the same time */ 10008ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s); 10018ebc4232SFrederic Weisbecker 10021da177e4SLinus Torvalds if (!journal_list_still_alive(s, trans_id)) { 100390415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 10041da177e4SLinus Torvalds goto put_jl; 10051da177e4SLinus Torvalds } 10061da177e4SLinus Torvalds BUG_ON(jl->j_trans_id == 0); 10071da177e4SLinus Torvalds 10081da177e4SLinus Torvalds /* this commit is done, exit */ 10091da177e4SLinus Torvalds if (atomic_read(&(jl->j_commit_left)) <= 0) { 10101da177e4SLinus Torvalds if (flushall) { 10111da177e4SLinus Torvalds atomic_set(&(jl->j_older_commits_done), 1); 10121da177e4SLinus Torvalds } 101390415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 10141da177e4SLinus Torvalds goto put_jl; 10151da177e4SLinus Torvalds } 10161da177e4SLinus Torvalds 10171da177e4SLinus Torvalds if (!list_empty(&jl->j_bh_list)) { 10183d4492f8SChris Mason int ret; 10198ebc4232SFrederic Weisbecker 10208ebc4232SFrederic Weisbecker /* 10218ebc4232SFrederic Weisbecker * We might sleep in numerous places inside 10228ebc4232SFrederic Weisbecker * write_ordered_buffers. Relax the write lock. 10238ebc4232SFrederic Weisbecker */ 1024278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(s); 10253d4492f8SChris Mason ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, 10261da177e4SLinus Torvalds journal, jl, &jl->j_bh_list); 10273d4492f8SChris Mason if (ret < 0 && retval == 0) 10283d4492f8SChris Mason retval = ret; 1029278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 10301da177e4SLinus Torvalds } 10311da177e4SLinus Torvalds BUG_ON(!list_empty(&jl->j_bh_list)); 10321da177e4SLinus Torvalds /* 10331da177e4SLinus Torvalds * for the description block and all the log blocks, submit any buffers 1034e0e851cfSChris Mason * that haven't already reached the disk. Try to write at least 256 1035e0e851cfSChris Mason * log blocks. later on, we will only wait on blocks that correspond 1036e0e851cfSChris Mason * to this transaction, but while we're unplugging we might as well 1037e0e851cfSChris Mason * get a chunk of data on there. 10381da177e4SLinus Torvalds */ 10391da177e4SLinus Torvalds atomic_inc(&journal->j_async_throttle); 1040e0e851cfSChris Mason write_len = jl->j_len + 1; 1041e0e851cfSChris Mason if (write_len < 256) 1042e0e851cfSChris Mason write_len = 256; 1043e0e851cfSChris Mason for (i = 0 ; i < write_len ; i++) { 10441da177e4SLinus Torvalds bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % 10451da177e4SLinus Torvalds SB_ONDISK_JOURNAL_SIZE(s); 10461da177e4SLinus Torvalds tbh = journal_find_get_block(s, bn); 1047e0e851cfSChris Mason if (tbh) { 10486e3647acSFrederic Weisbecker if (buffer_dirty(tbh)) { 1049278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(s); 1050e0e851cfSChris Mason ll_rw_block(WRITE, 1, &tbh); 1051278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 10526e3647acSFrederic Weisbecker } 10531da177e4SLinus Torvalds put_bh(tbh) ; 10541da177e4SLinus Torvalds } 1055e0e851cfSChris Mason } 10561da177e4SLinus Torvalds atomic_dec(&journal->j_async_throttle); 10571da177e4SLinus Torvalds 10581da177e4SLinus Torvalds for (i = 0; i < (jl->j_len + 1); i++) { 10591da177e4SLinus Torvalds bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 10601da177e4SLinus Torvalds (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 10611da177e4SLinus Torvalds tbh = journal_find_get_block(s, bn); 10628ebc4232SFrederic Weisbecker 1063278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(s); 1064278f6679SJeff Mahoney __wait_on_buffer(tbh); 1065278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 10661da177e4SLinus Torvalds // since we're using ll_rw_blk above, it might have skipped over 10671da177e4SLinus Torvalds // a locked buffer. Double check here 10681da177e4SLinus Torvalds // 10698ebc4232SFrederic Weisbecker /* redundant, sync_dirty_buffer() checks */ 10708ebc4232SFrederic Weisbecker if (buffer_dirty(tbh)) { 1071278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(s); 10721da177e4SLinus Torvalds sync_dirty_buffer(tbh); 1073278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 10748ebc4232SFrederic Weisbecker } 10751da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(tbh))) { 10761da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 107745b03d5eSJeff Mahoney reiserfs_warning(s, "journal-601", 107845b03d5eSJeff Mahoney "buffer write failed"); 10791da177e4SLinus Torvalds #endif 10801da177e4SLinus Torvalds retval = -EIO; 10811da177e4SLinus Torvalds } 10821da177e4SLinus Torvalds put_bh(tbh); /* once for journal_find_get_block */ 10831da177e4SLinus Torvalds put_bh(tbh); /* once due to original getblk in do_journal_end */ 10841da177e4SLinus Torvalds atomic_dec(&(jl->j_commit_left)); 10851da177e4SLinus Torvalds } 10861da177e4SLinus Torvalds 10871da177e4SLinus Torvalds BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); 10881da177e4SLinus Torvalds 10895d5e8156SJeff Mahoney /* If there was a write error in the journal - we can't commit 10905d5e8156SJeff Mahoney * this transaction - it will be invalid and, if successful, 1091beb7dd86SRobert P. J. Day * will just end up propagating the write error out to 10925d5e8156SJeff Mahoney * the file system. */ 10935d5e8156SJeff Mahoney if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { 10941da177e4SLinus Torvalds if (buffer_dirty(jl->j_commit_bh)) 10951da177e4SLinus Torvalds BUG(); 10961da177e4SLinus Torvalds mark_buffer_dirty(jl->j_commit_bh) ; 1097278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(s); 10987cd33ad2SChristoph Hellwig if (reiserfs_barrier_flush(s)) 10997cd33ad2SChristoph Hellwig __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); 11007cd33ad2SChristoph Hellwig else 11011da177e4SLinus Torvalds sync_dirty_buffer(jl->j_commit_bh); 1102278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 11035d5e8156SJeff Mahoney } 11041da177e4SLinus Torvalds 11051da177e4SLinus Torvalds /* If there was a write error in the journal - we can't commit this 11061da177e4SLinus Torvalds * transaction - it will be invalid and, if successful, will just end 1107beb7dd86SRobert P. J. Day * up propagating the write error out to the filesystem. */ 11081da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 11091da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 111045b03d5eSJeff Mahoney reiserfs_warning(s, "journal-615", "buffer write failed"); 11111da177e4SLinus Torvalds #endif 11121da177e4SLinus Torvalds retval = -EIO; 11131da177e4SLinus Torvalds } 11141da177e4SLinus Torvalds bforget(jl->j_commit_bh); 11151da177e4SLinus Torvalds if (journal->j_last_commit_id != 0 && 11161da177e4SLinus Torvalds (jl->j_trans_id - journal->j_last_commit_id) != 1) { 111745b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu", 1118bd4c625cSLinus Torvalds journal->j_last_commit_id, jl->j_trans_id); 11191da177e4SLinus Torvalds } 11201da177e4SLinus Torvalds journal->j_last_commit_id = jl->j_trans_id; 11211da177e4SLinus Torvalds 11221da177e4SLinus Torvalds /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 11231da177e4SLinus Torvalds cleanup_freed_for_journal_list(s, jl); 11241da177e4SLinus Torvalds 11251da177e4SLinus Torvalds retval = retval ? retval : journal->j_errno; 11261da177e4SLinus Torvalds 11271da177e4SLinus Torvalds /* mark the metadata dirty */ 11281da177e4SLinus Torvalds if (!retval) 11291da177e4SLinus Torvalds dirty_one_transaction(s, jl); 11301da177e4SLinus Torvalds atomic_dec(&(jl->j_commit_left)); 11311da177e4SLinus Torvalds 11321da177e4SLinus Torvalds if (flushall) { 11331da177e4SLinus Torvalds atomic_set(&(jl->j_older_commits_done), 1); 11341da177e4SLinus Torvalds } 113590415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 11361da177e4SLinus Torvalds put_jl: 11371da177e4SLinus Torvalds put_journal_list(s, jl); 11381da177e4SLinus Torvalds 11391da177e4SLinus Torvalds if (retval) 1140bd4c625cSLinus Torvalds reiserfs_abort(s, retval, "Journal write error in %s", 1141fbe5498bSHarvey Harrison __func__); 11421da177e4SLinus Torvalds return retval; 11431da177e4SLinus Torvalds } 11441da177e4SLinus Torvalds 11451da177e4SLinus Torvalds /* 11461da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 11471da177e4SLinus Torvalds ** returns NULL if it can't find anything 11481da177e4SLinus Torvalds */ 1149bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct 1150bd4c625cSLinus Torvalds reiserfs_journal_cnode 1151bd4c625cSLinus Torvalds *cn) 1152bd4c625cSLinus Torvalds { 11531da177e4SLinus Torvalds struct super_block *sb = cn->sb; 11541da177e4SLinus Torvalds b_blocknr_t blocknr = cn->blocknr; 11551da177e4SLinus Torvalds 11561da177e4SLinus Torvalds cn = cn->hprev; 11571da177e4SLinus Torvalds while (cn) { 11581da177e4SLinus Torvalds if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { 11591da177e4SLinus Torvalds return cn->jlist; 11601da177e4SLinus Torvalds } 11611da177e4SLinus Torvalds cn = cn->hprev; 11621da177e4SLinus Torvalds } 11631da177e4SLinus Torvalds return NULL; 11641da177e4SLinus Torvalds } 11651da177e4SLinus Torvalds 1166bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *, 1167bd4c625cSLinus Torvalds struct reiserfs_journal_cnode **, 1168bd4c625cSLinus Torvalds struct reiserfs_journal_list *, unsigned long, 1169bd4c625cSLinus Torvalds int); 11701da177e4SLinus Torvalds 11711da177e4SLinus Torvalds /* 11721da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the 11731da177e4SLinus Torvalds ** journal list for this transaction. Aside from freeing the cnode, this also allows the 11741da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted. 11751da177e4SLinus Torvalds */ 1176a9dd3643SJeff Mahoney static void remove_all_from_journal_list(struct super_block *sb, 1177bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, 1178bd4c625cSLinus Torvalds int debug) 1179bd4c625cSLinus Torvalds { 1180a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 11811da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *last; 11821da177e4SLinus Torvalds cn = jl->j_realblock; 11831da177e4SLinus Torvalds 11841da177e4SLinus Torvalds /* which is better, to lock once around the whole loop, or 11851da177e4SLinus Torvalds ** to lock for each call to remove_journal_hash? 11861da177e4SLinus Torvalds */ 11871da177e4SLinus Torvalds while (cn) { 11881da177e4SLinus Torvalds if (cn->blocknr != 0) { 11891da177e4SLinus Torvalds if (debug) { 1190a9dd3643SJeff Mahoney reiserfs_warning(sb, "reiserfs-2201", 1191bd4c625cSLinus Torvalds "block %u, bh is %d, state %ld", 1192bd4c625cSLinus Torvalds cn->blocknr, cn->bh ? 1 : 0, 1193bd4c625cSLinus Torvalds cn->state); 11941da177e4SLinus Torvalds } 11951da177e4SLinus Torvalds cn->state = 0; 1196a9dd3643SJeff Mahoney remove_journal_hash(sb, journal->j_list_hash_table, 1197bd4c625cSLinus Torvalds jl, cn->blocknr, 1); 11981da177e4SLinus Torvalds } 11991da177e4SLinus Torvalds last = cn; 12001da177e4SLinus Torvalds cn = cn->next; 1201a9dd3643SJeff Mahoney free_cnode(sb, last); 12021da177e4SLinus Torvalds } 12031da177e4SLinus Torvalds jl->j_realblock = NULL; 12041da177e4SLinus Torvalds } 12051da177e4SLinus Torvalds 12061da177e4SLinus Torvalds /* 12071da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. 12081da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start 12091da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks. 12101da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list 12111da177e4SLinus Torvalds ** 12121da177e4SLinus Torvalds */ 1213a9dd3643SJeff Mahoney static int _update_journal_header_block(struct super_block *sb, 1214bd4c625cSLinus Torvalds unsigned long offset, 1215600ed416SJeff Mahoney unsigned int trans_id) 1216bd4c625cSLinus Torvalds { 12171da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 1218a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1219278f6679SJeff Mahoney int depth; 12201da177e4SLinus Torvalds 12211da177e4SLinus Torvalds if (reiserfs_is_journal_aborted(journal)) 12221da177e4SLinus Torvalds return -EIO; 12231da177e4SLinus Torvalds 12241da177e4SLinus Torvalds if (trans_id >= journal->j_last_flush_trans_id) { 12251da177e4SLinus Torvalds if (buffer_locked((journal->j_header_bh))) { 1226278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(sb); 1227278f6679SJeff Mahoney __wait_on_buffer(journal->j_header_bh); 1228278f6679SJeff Mahoney reiserfs_write_lock_nested(sb, depth); 12291da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 12301da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 1231a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-699", 123245b03d5eSJeff Mahoney "buffer write failed"); 12331da177e4SLinus Torvalds #endif 12341da177e4SLinus Torvalds return -EIO; 12351da177e4SLinus Torvalds } 12361da177e4SLinus Torvalds } 12371da177e4SLinus Torvalds journal->j_last_flush_trans_id = trans_id; 12381da177e4SLinus Torvalds journal->j_first_unflushed_offset = offset; 1239bd4c625cSLinus Torvalds jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> 1240bd4c625cSLinus Torvalds b_data); 12411da177e4SLinus Torvalds jh->j_last_flush_trans_id = cpu_to_le32(trans_id); 12421da177e4SLinus Torvalds jh->j_first_unflushed_offset = cpu_to_le32(offset); 12431da177e4SLinus Torvalds jh->j_mount_id = cpu_to_le32(journal->j_mount_id); 12441da177e4SLinus Torvalds 12451da177e4SLinus Torvalds set_buffer_dirty(journal->j_header_bh); 1246278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(sb); 12477cd33ad2SChristoph Hellwig 12487cd33ad2SChristoph Hellwig if (reiserfs_barrier_flush(sb)) 12497cd33ad2SChristoph Hellwig __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); 12507cd33ad2SChristoph Hellwig else 12511da177e4SLinus Torvalds sync_dirty_buffer(journal->j_header_bh); 12527cd33ad2SChristoph Hellwig 1253278f6679SJeff Mahoney reiserfs_write_lock_nested(sb, depth); 12541da177e4SLinus Torvalds if (!buffer_uptodate(journal->j_header_bh)) { 1255a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-837", 125645b03d5eSJeff Mahoney "IO error during journal replay"); 12571da177e4SLinus Torvalds return -EIO; 12581da177e4SLinus Torvalds } 12591da177e4SLinus Torvalds } 12601da177e4SLinus Torvalds return 0; 12611da177e4SLinus Torvalds } 12621da177e4SLinus Torvalds 1263a9dd3643SJeff Mahoney static int update_journal_header_block(struct super_block *sb, 12641da177e4SLinus Torvalds unsigned long offset, 1265600ed416SJeff Mahoney unsigned int trans_id) 1266bd4c625cSLinus Torvalds { 1267a9dd3643SJeff Mahoney return _update_journal_header_block(sb, offset, trans_id); 12681da177e4SLinus Torvalds } 1269bd4c625cSLinus Torvalds 12701da177e4SLinus Torvalds /* 12711da177e4SLinus Torvalds ** flush any and all journal lists older than you are 12721da177e4SLinus Torvalds ** can only be called from flush_journal_list 12731da177e4SLinus Torvalds */ 1274a9dd3643SJeff Mahoney static int flush_older_journal_lists(struct super_block *sb, 12751da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 12761da177e4SLinus Torvalds { 12771da177e4SLinus Torvalds struct list_head *entry; 12781da177e4SLinus Torvalds struct reiserfs_journal_list *other_jl; 1279a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1280600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 12811da177e4SLinus Torvalds 12821da177e4SLinus Torvalds /* we know we are the only ones flushing things, no extra race 12831da177e4SLinus Torvalds * protection is required. 12841da177e4SLinus Torvalds */ 12851da177e4SLinus Torvalds restart: 12861da177e4SLinus Torvalds entry = journal->j_journal_list.next; 12871da177e4SLinus Torvalds /* Did we wrap? */ 12881da177e4SLinus Torvalds if (entry == &journal->j_journal_list) 12891da177e4SLinus Torvalds return 0; 12901da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 12911da177e4SLinus Torvalds if (other_jl->j_trans_id < trans_id) { 12921da177e4SLinus Torvalds BUG_ON(other_jl->j_refcount <= 0); 12931da177e4SLinus Torvalds /* do not flush all */ 1294a9dd3643SJeff Mahoney flush_journal_list(sb, other_jl, 0); 12951da177e4SLinus Torvalds 12961da177e4SLinus Torvalds /* other_jl is now deleted from the list */ 12971da177e4SLinus Torvalds goto restart; 12981da177e4SLinus Torvalds } 12991da177e4SLinus Torvalds return 0; 13001da177e4SLinus Torvalds } 13011da177e4SLinus Torvalds 13021da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s, 1303bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 1304bd4c625cSLinus Torvalds { 13051da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 13061da177e4SLinus Torvalds if (!list_empty(&jl->j_working_list)) { 13071da177e4SLinus Torvalds list_del_init(&jl->j_working_list); 13081da177e4SLinus Torvalds journal->j_num_work_lists--; 13091da177e4SLinus Torvalds } 13101da177e4SLinus Torvalds } 13111da177e4SLinus Torvalds 13121da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks 13131da177e4SLinus Torvalds ** 13141da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside 13151da177e4SLinus Torvalds ** flush_journal_list 13161da177e4SLinus Torvalds ** 13171da177e4SLinus Torvalds ** IMPORTANT. This can only be called while there are no journal writers, 13181da177e4SLinus Torvalds ** and the journal is locked. That means it can only be called from 13191da177e4SLinus Torvalds ** do_journal_end, or by journal_release 13201da177e4SLinus Torvalds */ 13211da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s, 1322bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall) 1323bd4c625cSLinus Torvalds { 13241da177e4SLinus Torvalds struct reiserfs_journal_list *pjl; 13251da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *last; 13261da177e4SLinus Torvalds int count; 13271da177e4SLinus Torvalds int was_jwait = 0; 13281da177e4SLinus Torvalds int was_dirty = 0; 13291da177e4SLinus Torvalds struct buffer_head *saved_bh; 13301da177e4SLinus Torvalds unsigned long j_len_saved = jl->j_len; 13311da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 13321da177e4SLinus Torvalds int err = 0; 1333278f6679SJeff Mahoney int depth; 13341da177e4SLinus Torvalds 13351da177e4SLinus Torvalds BUG_ON(j_len_saved <= 0); 13361da177e4SLinus Torvalds 13371da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) != 0) { 133845b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2048", "called with wcount %d", 13391da177e4SLinus Torvalds atomic_read(&journal->j_wcount)); 13401da177e4SLinus Torvalds } 13411da177e4SLinus Torvalds 13421da177e4SLinus Torvalds /* if flushall == 0, the lock is already held */ 13431da177e4SLinus Torvalds if (flushall) { 13448ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); 1345afe70259SJeff Mahoney } else if (mutex_trylock(&journal->j_flush_mutex)) { 13461da177e4SLinus Torvalds BUG(); 13471da177e4SLinus Torvalds } 13481da177e4SLinus Torvalds 13491da177e4SLinus Torvalds count = 0; 13501da177e4SLinus Torvalds if (j_len_saved > journal->j_trans_max) { 1351c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu", 1352bd4c625cSLinus Torvalds j_len_saved, jl->j_trans_id); 13531da177e4SLinus Torvalds return 0; 13541da177e4SLinus Torvalds } 13551da177e4SLinus Torvalds 13561da177e4SLinus Torvalds /* if all the work is already done, get out of here */ 13571da177e4SLinus Torvalds if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 13581da177e4SLinus Torvalds atomic_read(&(jl->j_commit_left)) <= 0) { 13591da177e4SLinus Torvalds goto flush_older_and_return; 13601da177e4SLinus Torvalds } 13611da177e4SLinus Torvalds 13621da177e4SLinus Torvalds /* start by putting the commit list on disk. This will also flush 13631da177e4SLinus Torvalds ** the commit lists of any olders transactions 13641da177e4SLinus Torvalds */ 13651da177e4SLinus Torvalds flush_commit_list(s, jl, 1); 13661da177e4SLinus Torvalds 1367bd4c625cSLinus Torvalds if (!(jl->j_state & LIST_DIRTY) 1368bd4c625cSLinus Torvalds && !reiserfs_is_journal_aborted(journal)) 13691da177e4SLinus Torvalds BUG(); 13701da177e4SLinus Torvalds 13711da177e4SLinus Torvalds /* are we done now? */ 13721da177e4SLinus Torvalds if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 13731da177e4SLinus Torvalds atomic_read(&(jl->j_commit_left)) <= 0) { 13741da177e4SLinus Torvalds goto flush_older_and_return; 13751da177e4SLinus Torvalds } 13761da177e4SLinus Torvalds 13771da177e4SLinus Torvalds /* loop through each cnode, see if we need to write it, 13781da177e4SLinus Torvalds ** or wait on a more recent transaction, or just ignore it 13791da177e4SLinus Torvalds */ 13801da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) != 0) { 1381c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-844", "journal list is flushing, " 1382c3a9c210SJeff Mahoney "wcount is not 0"); 13831da177e4SLinus Torvalds } 13841da177e4SLinus Torvalds cn = jl->j_realblock; 13851da177e4SLinus Torvalds while (cn) { 13861da177e4SLinus Torvalds was_jwait = 0; 13871da177e4SLinus Torvalds was_dirty = 0; 13881da177e4SLinus Torvalds saved_bh = NULL; 13891da177e4SLinus Torvalds /* blocknr of 0 is no longer in the hash, ignore it */ 13901da177e4SLinus Torvalds if (cn->blocknr == 0) { 13911da177e4SLinus Torvalds goto free_cnode; 13921da177e4SLinus Torvalds } 13931da177e4SLinus Torvalds 13941da177e4SLinus Torvalds /* This transaction failed commit. Don't write out to the disk */ 13951da177e4SLinus Torvalds if (!(jl->j_state & LIST_DIRTY)) 13961da177e4SLinus Torvalds goto free_cnode; 13971da177e4SLinus Torvalds 13981da177e4SLinus Torvalds pjl = find_newer_jl_for_cn(cn); 13991da177e4SLinus Torvalds /* the order is important here. We check pjl to make sure we 14001da177e4SLinus Torvalds ** don't clear BH_JDirty_wait if we aren't the one writing this 14011da177e4SLinus Torvalds ** block to disk 14021da177e4SLinus Torvalds */ 14031da177e4SLinus Torvalds if (!pjl && cn->bh) { 14041da177e4SLinus Torvalds saved_bh = cn->bh; 14051da177e4SLinus Torvalds 14061da177e4SLinus Torvalds /* we do this to make sure nobody releases the buffer while 14071da177e4SLinus Torvalds ** we are working with it 14081da177e4SLinus Torvalds */ 14091da177e4SLinus Torvalds get_bh(saved_bh); 14101da177e4SLinus Torvalds 14111da177e4SLinus Torvalds if (buffer_journal_dirty(saved_bh)) { 14121da177e4SLinus Torvalds BUG_ON(!can_dirty(cn)); 14131da177e4SLinus Torvalds was_jwait = 1; 14141da177e4SLinus Torvalds was_dirty = 1; 14151da177e4SLinus Torvalds } else if (can_dirty(cn)) { 14161da177e4SLinus Torvalds /* everything with !pjl && jwait should be writable */ 14171da177e4SLinus Torvalds BUG(); 14181da177e4SLinus Torvalds } 14191da177e4SLinus Torvalds } 14201da177e4SLinus Torvalds 14211da177e4SLinus Torvalds /* if someone has this block in a newer transaction, just make 14220779bf2dSMatt LaPlante ** sure they are committed, and don't try writing it to disk 14231da177e4SLinus Torvalds */ 14241da177e4SLinus Torvalds if (pjl) { 14251da177e4SLinus Torvalds if (atomic_read(&pjl->j_commit_left)) 14261da177e4SLinus Torvalds flush_commit_list(s, pjl, 1); 14271da177e4SLinus Torvalds goto free_cnode; 14281da177e4SLinus Torvalds } 14291da177e4SLinus Torvalds 14301da177e4SLinus Torvalds /* bh == NULL when the block got to disk on its own, OR, 14311da177e4SLinus Torvalds ** the block got freed in a future transaction 14321da177e4SLinus Torvalds */ 14331da177e4SLinus Torvalds if (saved_bh == NULL) { 14341da177e4SLinus Torvalds goto free_cnode; 14351da177e4SLinus Torvalds } 14361da177e4SLinus Torvalds 14371da177e4SLinus Torvalds /* this should never happen. kupdate_one_transaction has this list 14381da177e4SLinus Torvalds ** locked while it works, so we should never see a buffer here that 14391da177e4SLinus Torvalds ** is not marked JDirty_wait 14401da177e4SLinus Torvalds */ 14411da177e4SLinus Torvalds if ((!was_jwait) && !buffer_locked(saved_bh)) { 144245b03d5eSJeff Mahoney reiserfs_warning(s, "journal-813", 144345b03d5eSJeff Mahoney "BAD! buffer %llu %cdirty %cjwait, " 14441da177e4SLinus Torvalds "not in a newer tranasction", 1445bd4c625cSLinus Torvalds (unsigned long long)saved_bh-> 1446bd4c625cSLinus Torvalds b_blocknr, was_dirty ? ' ' : '!', 1447bd4c625cSLinus Torvalds was_jwait ? ' ' : '!'); 14481da177e4SLinus Torvalds } 14491da177e4SLinus Torvalds if (was_dirty) { 14501da177e4SLinus Torvalds /* we inc again because saved_bh gets decremented at free_cnode */ 14511da177e4SLinus Torvalds get_bh(saved_bh); 14521da177e4SLinus Torvalds set_bit(BLOCK_NEEDS_FLUSH, &cn->state); 14531da177e4SLinus Torvalds lock_buffer(saved_bh); 14541da177e4SLinus Torvalds BUG_ON(cn->blocknr != saved_bh->b_blocknr); 14551da177e4SLinus Torvalds if (buffer_dirty(saved_bh)) 14561da177e4SLinus Torvalds submit_logged_buffer(saved_bh); 14571da177e4SLinus Torvalds else 14581da177e4SLinus Torvalds unlock_buffer(saved_bh); 14591da177e4SLinus Torvalds count++; 14601da177e4SLinus Torvalds } else { 146145b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2082", 146245b03d5eSJeff Mahoney "Unable to flush buffer %llu in %s", 1463bd4c625cSLinus Torvalds (unsigned long long)saved_bh-> 1464fbe5498bSHarvey Harrison b_blocknr, __func__); 14651da177e4SLinus Torvalds } 14661da177e4SLinus Torvalds free_cnode: 14671da177e4SLinus Torvalds last = cn; 14681da177e4SLinus Torvalds cn = cn->next; 14691da177e4SLinus Torvalds if (saved_bh) { 14701da177e4SLinus Torvalds /* we incremented this to keep others from taking the buffer head away */ 14711da177e4SLinus Torvalds put_bh(saved_bh); 14721da177e4SLinus Torvalds if (atomic_read(&(saved_bh->b_count)) < 0) { 147345b03d5eSJeff Mahoney reiserfs_warning(s, "journal-945", 147445b03d5eSJeff Mahoney "saved_bh->b_count < 0"); 14751da177e4SLinus Torvalds } 14761da177e4SLinus Torvalds } 14771da177e4SLinus Torvalds } 14781da177e4SLinus Torvalds if (count > 0) { 14791da177e4SLinus Torvalds cn = jl->j_realblock; 14801da177e4SLinus Torvalds while (cn) { 14811da177e4SLinus Torvalds if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { 14821da177e4SLinus Torvalds if (!cn->bh) { 1483c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-1011", 1484c3a9c210SJeff Mahoney "cn->bh is NULL"); 14851da177e4SLinus Torvalds } 14868ebc4232SFrederic Weisbecker 1487278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(s); 1488278f6679SJeff Mahoney __wait_on_buffer(cn->bh); 1489278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 14908ebc4232SFrederic Weisbecker 14911da177e4SLinus Torvalds if (!cn->bh) { 1492c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-1012", 1493c3a9c210SJeff Mahoney "cn->bh is NULL"); 14941da177e4SLinus Torvalds } 14951da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(cn->bh))) { 14961da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 149745b03d5eSJeff Mahoney reiserfs_warning(s, "journal-949", 149845b03d5eSJeff Mahoney "buffer write failed"); 14991da177e4SLinus Torvalds #endif 15001da177e4SLinus Torvalds err = -EIO; 15011da177e4SLinus Torvalds } 15021da177e4SLinus Torvalds /* note, we must clear the JDirty_wait bit after the up to date 15031da177e4SLinus Torvalds ** check, otherwise we race against our flushpage routine 15041da177e4SLinus Torvalds */ 1505bd4c625cSLinus Torvalds BUG_ON(!test_clear_buffer_journal_dirty 1506bd4c625cSLinus Torvalds (cn->bh)); 15071da177e4SLinus Torvalds 1508398c95bdSChris Mason /* drop one ref for us */ 15091da177e4SLinus Torvalds put_bh(cn->bh); 1510398c95bdSChris Mason /* drop one ref for journal_mark_dirty */ 1511398c95bdSChris Mason release_buffer_page(cn->bh); 15121da177e4SLinus Torvalds } 15131da177e4SLinus Torvalds cn = cn->next; 15141da177e4SLinus Torvalds } 15151da177e4SLinus Torvalds } 15161da177e4SLinus Torvalds 15171da177e4SLinus Torvalds if (err) 1518bd4c625cSLinus Torvalds reiserfs_abort(s, -EIO, 1519bd4c625cSLinus Torvalds "Write error while pushing transaction to disk in %s", 1520fbe5498bSHarvey Harrison __func__); 15211da177e4SLinus Torvalds flush_older_and_return: 15221da177e4SLinus Torvalds 15231da177e4SLinus Torvalds /* before we can update the journal header block, we _must_ flush all 15241da177e4SLinus Torvalds ** real blocks from all older transactions to disk. This is because 15251da177e4SLinus Torvalds ** once the header block is updated, this transaction will not be 15261da177e4SLinus Torvalds ** replayed after a crash 15271da177e4SLinus Torvalds */ 15281da177e4SLinus Torvalds if (flushall) { 15291da177e4SLinus Torvalds flush_older_journal_lists(s, jl); 15301da177e4SLinus Torvalds } 15311da177e4SLinus Torvalds 15321da177e4SLinus Torvalds err = journal->j_errno; 15331da177e4SLinus Torvalds /* before we can remove everything from the hash tables for this 15341da177e4SLinus Torvalds ** transaction, we must make sure it can never be replayed 15351da177e4SLinus Torvalds ** 15361da177e4SLinus Torvalds ** since we are only called from do_journal_end, we know for sure there 15371da177e4SLinus Torvalds ** are no allocations going on while we are flushing journal lists. So, 15381da177e4SLinus Torvalds ** we only need to update the journal header block for the last list 15391da177e4SLinus Torvalds ** being flushed 15401da177e4SLinus Torvalds */ 15411da177e4SLinus Torvalds if (!err && flushall) { 1542bd4c625cSLinus Torvalds err = 1543bd4c625cSLinus Torvalds update_journal_header_block(s, 1544bd4c625cSLinus Torvalds (jl->j_start + jl->j_len + 1545bd4c625cSLinus Torvalds 2) % SB_ONDISK_JOURNAL_SIZE(s), 1546bd4c625cSLinus Torvalds jl->j_trans_id); 15471da177e4SLinus Torvalds if (err) 1548bd4c625cSLinus Torvalds reiserfs_abort(s, -EIO, 1549bd4c625cSLinus Torvalds "Write error while updating journal header in %s", 1550fbe5498bSHarvey Harrison __func__); 15511da177e4SLinus Torvalds } 15521da177e4SLinus Torvalds remove_all_from_journal_list(s, jl, 0); 15531da177e4SLinus Torvalds list_del_init(&jl->j_list); 15541da177e4SLinus Torvalds journal->j_num_lists--; 15551da177e4SLinus Torvalds del_from_work_list(s, jl); 15561da177e4SLinus Torvalds 15571da177e4SLinus Torvalds if (journal->j_last_flush_id != 0 && 15581da177e4SLinus Torvalds (jl->j_trans_id - journal->j_last_flush_id) != 1) { 155945b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu", 1560bd4c625cSLinus Torvalds journal->j_last_flush_id, jl->j_trans_id); 15611da177e4SLinus Torvalds } 15621da177e4SLinus Torvalds journal->j_last_flush_id = jl->j_trans_id; 15631da177e4SLinus Torvalds 15641da177e4SLinus Torvalds /* not strictly required since we are freeing the list, but it should 15651da177e4SLinus Torvalds * help find code using dead lists later on 15661da177e4SLinus Torvalds */ 15671da177e4SLinus Torvalds jl->j_len = 0; 15681da177e4SLinus Torvalds atomic_set(&(jl->j_nonzerolen), 0); 15691da177e4SLinus Torvalds jl->j_start = 0; 15701da177e4SLinus Torvalds jl->j_realblock = NULL; 15711da177e4SLinus Torvalds jl->j_commit_bh = NULL; 15721da177e4SLinus Torvalds jl->j_trans_id = 0; 15731da177e4SLinus Torvalds jl->j_state = 0; 15741da177e4SLinus Torvalds put_journal_list(s, jl); 15751da177e4SLinus Torvalds if (flushall) 1576afe70259SJeff Mahoney mutex_unlock(&journal->j_flush_mutex); 15771da177e4SLinus Torvalds return err; 15781da177e4SLinus Torvalds } 15791da177e4SLinus Torvalds 15801da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s, 15811da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 15821da177e4SLinus Torvalds struct buffer_chunk *chunk) 15831da177e4SLinus Torvalds { 15841da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 15851da177e4SLinus Torvalds int ret = 0; 15861da177e4SLinus Torvalds 15871da177e4SLinus Torvalds jl->j_state |= LIST_TOUCHED; 15881da177e4SLinus Torvalds del_from_work_list(s, jl); 15891da177e4SLinus Torvalds if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { 15901da177e4SLinus Torvalds return 0; 15911da177e4SLinus Torvalds } 15921da177e4SLinus Torvalds 15931da177e4SLinus Torvalds cn = jl->j_realblock; 15941da177e4SLinus Torvalds while (cn) { 15951da177e4SLinus Torvalds /* if the blocknr == 0, this has been cleared from the hash, 15961da177e4SLinus Torvalds ** skip it 15971da177e4SLinus Torvalds */ 15981da177e4SLinus Torvalds if (cn->blocknr == 0) { 15991da177e4SLinus Torvalds goto next; 16001da177e4SLinus Torvalds } 16011da177e4SLinus Torvalds if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 16021da177e4SLinus Torvalds struct buffer_head *tmp_bh; 16031da177e4SLinus Torvalds /* we can race against journal_mark_freed when we try 16041da177e4SLinus Torvalds * to lock_buffer(cn->bh), so we have to inc the buffer 16051da177e4SLinus Torvalds * count, and recheck things after locking 16061da177e4SLinus Torvalds */ 16071da177e4SLinus Torvalds tmp_bh = cn->bh; 16081da177e4SLinus Torvalds get_bh(tmp_bh); 16091da177e4SLinus Torvalds lock_buffer(tmp_bh); 16101da177e4SLinus Torvalds if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { 16111da177e4SLinus Torvalds if (!buffer_journal_dirty(tmp_bh) || 16121da177e4SLinus Torvalds buffer_journal_prepared(tmp_bh)) 16131da177e4SLinus Torvalds BUG(); 16141da177e4SLinus Torvalds add_to_chunk(chunk, tmp_bh, NULL, write_chunk); 16151da177e4SLinus Torvalds ret++; 16161da177e4SLinus Torvalds } else { 16171da177e4SLinus Torvalds /* note, cn->bh might be null now */ 16181da177e4SLinus Torvalds unlock_buffer(tmp_bh); 16191da177e4SLinus Torvalds } 16201da177e4SLinus Torvalds put_bh(tmp_bh); 16211da177e4SLinus Torvalds } 16221da177e4SLinus Torvalds next: 16231da177e4SLinus Torvalds cn = cn->next; 16241da177e4SLinus Torvalds cond_resched(); 16251da177e4SLinus Torvalds } 16261da177e4SLinus Torvalds return ret; 16271da177e4SLinus Torvalds } 16281da177e4SLinus Torvalds 16291da177e4SLinus Torvalds /* used by flush_commit_list */ 16301da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s, 16311da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 16321da177e4SLinus Torvalds { 16331da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 16341da177e4SLinus Torvalds struct reiserfs_journal_list *pjl; 16351da177e4SLinus Torvalds int ret = 0; 16361da177e4SLinus Torvalds 16371da177e4SLinus Torvalds jl->j_state |= LIST_DIRTY; 16381da177e4SLinus Torvalds cn = jl->j_realblock; 16391da177e4SLinus Torvalds while (cn) { 16401da177e4SLinus Torvalds /* look for a more recent transaction that logged this 16411da177e4SLinus Torvalds ** buffer. Only the most recent transaction with a buffer in 16421da177e4SLinus Torvalds ** it is allowed to send that buffer to disk 16431da177e4SLinus Torvalds */ 16441da177e4SLinus Torvalds pjl = find_newer_jl_for_cn(cn); 1645bd4c625cSLinus Torvalds if (!pjl && cn->blocknr && cn->bh 1646bd4c625cSLinus Torvalds && buffer_journal_dirty(cn->bh)) { 16471da177e4SLinus Torvalds BUG_ON(!can_dirty(cn)); 16481da177e4SLinus Torvalds /* if the buffer is prepared, it will either be logged 16491da177e4SLinus Torvalds * or restored. If restored, we need to make sure 16501da177e4SLinus Torvalds * it actually gets marked dirty 16511da177e4SLinus Torvalds */ 16521da177e4SLinus Torvalds clear_buffer_journal_new(cn->bh); 16531da177e4SLinus Torvalds if (buffer_journal_prepared(cn->bh)) { 16541da177e4SLinus Torvalds set_buffer_journal_restore_dirty(cn->bh); 16551da177e4SLinus Torvalds } else { 16561da177e4SLinus Torvalds set_buffer_journal_test(cn->bh); 16571da177e4SLinus Torvalds mark_buffer_dirty(cn->bh); 16581da177e4SLinus Torvalds } 16591da177e4SLinus Torvalds } 16601da177e4SLinus Torvalds cn = cn->next; 16611da177e4SLinus Torvalds } 16621da177e4SLinus Torvalds return ret; 16631da177e4SLinus Torvalds } 16641da177e4SLinus Torvalds 16651da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s, 16661da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 16671da177e4SLinus Torvalds struct reiserfs_journal_list **next_jl, 1668600ed416SJeff Mahoney unsigned int *next_trans_id, 1669bd4c625cSLinus Torvalds int num_blocks, int num_trans) 1670bd4c625cSLinus Torvalds { 16711da177e4SLinus Torvalds int ret = 0; 16721da177e4SLinus Torvalds int written = 0; 16731da177e4SLinus Torvalds int transactions_flushed = 0; 1674600ed416SJeff Mahoney unsigned int orig_trans_id = jl->j_trans_id; 16751da177e4SLinus Torvalds struct buffer_chunk chunk; 16761da177e4SLinus Torvalds struct list_head *entry; 16771da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 16781da177e4SLinus Torvalds chunk.nr = 0; 16791da177e4SLinus Torvalds 1680a412f9efSFrederic Weisbecker reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); 16811da177e4SLinus Torvalds if (!journal_list_still_alive(s, orig_trans_id)) { 16821da177e4SLinus Torvalds goto done; 16831da177e4SLinus Torvalds } 16841da177e4SLinus Torvalds 1685afe70259SJeff Mahoney /* we've got j_flush_mutex held, nobody is going to delete any 16861da177e4SLinus Torvalds * of these lists out from underneath us 16871da177e4SLinus Torvalds */ 16881da177e4SLinus Torvalds while ((num_trans && transactions_flushed < num_trans) || 16891da177e4SLinus Torvalds (!num_trans && written < num_blocks)) { 16901da177e4SLinus Torvalds 16911da177e4SLinus Torvalds if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || 1692bd4c625cSLinus Torvalds atomic_read(&jl->j_commit_left) 1693bd4c625cSLinus Torvalds || !(jl->j_state & LIST_DIRTY)) { 16941da177e4SLinus Torvalds del_from_work_list(s, jl); 16951da177e4SLinus Torvalds break; 16961da177e4SLinus Torvalds } 16971da177e4SLinus Torvalds ret = write_one_transaction(s, jl, &chunk); 16981da177e4SLinus Torvalds 16991da177e4SLinus Torvalds if (ret < 0) 17001da177e4SLinus Torvalds goto done; 17011da177e4SLinus Torvalds transactions_flushed++; 17021da177e4SLinus Torvalds written += ret; 17031da177e4SLinus Torvalds entry = jl->j_list.next; 17041da177e4SLinus Torvalds 17051da177e4SLinus Torvalds /* did we wrap? */ 17061da177e4SLinus Torvalds if (entry == &journal->j_journal_list) { 17071da177e4SLinus Torvalds break; 17081da177e4SLinus Torvalds } 17091da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 17101da177e4SLinus Torvalds 17111da177e4SLinus Torvalds /* don't bother with older transactions */ 17121da177e4SLinus Torvalds if (jl->j_trans_id <= orig_trans_id) 17131da177e4SLinus Torvalds break; 17141da177e4SLinus Torvalds } 17151da177e4SLinus Torvalds if (chunk.nr) { 17161da177e4SLinus Torvalds write_chunk(&chunk); 17171da177e4SLinus Torvalds } 17181da177e4SLinus Torvalds 17191da177e4SLinus Torvalds done: 1720afe70259SJeff Mahoney mutex_unlock(&journal->j_flush_mutex); 17211da177e4SLinus Torvalds return ret; 17221da177e4SLinus Torvalds } 17231da177e4SLinus Torvalds 17241da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use 17251da177e4SLinus Torvalds ** all the journa list slots with tiny transactions. These 17261da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which 17271da177e4SLinus Torvalds ** adds seeks and slows things down. 17281da177e4SLinus Torvalds ** 17291da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists 17301da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal 17311da177e4SLinus Torvalds ** list updates the header block 17321da177e4SLinus Torvalds */ 17331da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s, 1734bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 1735bd4c625cSLinus Torvalds { 17361da177e4SLinus Torvalds unsigned long len = 0; 17371da177e4SLinus Torvalds unsigned long cur_len; 17381da177e4SLinus Torvalds int ret; 17391da177e4SLinus Torvalds int i; 17401da177e4SLinus Torvalds int limit = 256; 17411da177e4SLinus Torvalds struct reiserfs_journal_list *tjl; 17421da177e4SLinus Torvalds struct reiserfs_journal_list *flush_jl; 1743600ed416SJeff Mahoney unsigned int trans_id; 17441da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 17451da177e4SLinus Torvalds 17461da177e4SLinus Torvalds flush_jl = tjl = jl; 17471da177e4SLinus Torvalds 17481da177e4SLinus Torvalds /* in data logging mode, try harder to flush a lot of blocks */ 17491da177e4SLinus Torvalds if (reiserfs_data_log(s)) 17501da177e4SLinus Torvalds limit = 1024; 17511da177e4SLinus Torvalds /* flush for 256 transactions or limit blocks, whichever comes first */ 17521da177e4SLinus Torvalds for (i = 0; i < 256 && len < limit; i++) { 17531da177e4SLinus Torvalds if (atomic_read(&tjl->j_commit_left) || 17541da177e4SLinus Torvalds tjl->j_trans_id < jl->j_trans_id) { 17551da177e4SLinus Torvalds break; 17561da177e4SLinus Torvalds } 17571da177e4SLinus Torvalds cur_len = atomic_read(&tjl->j_nonzerolen); 17581da177e4SLinus Torvalds if (cur_len > 0) { 17591da177e4SLinus Torvalds tjl->j_state &= ~LIST_TOUCHED; 17601da177e4SLinus Torvalds } 17611da177e4SLinus Torvalds len += cur_len; 17621da177e4SLinus Torvalds flush_jl = tjl; 17631da177e4SLinus Torvalds if (tjl->j_list.next == &journal->j_journal_list) 17641da177e4SLinus Torvalds break; 17651da177e4SLinus Torvalds tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); 17661da177e4SLinus Torvalds } 1767721a769cSJeff Mahoney get_journal_list(jl); 1768721a769cSJeff Mahoney get_journal_list(flush_jl); 17691da177e4SLinus Torvalds /* try to find a group of blocks we can flush across all the 17701da177e4SLinus Torvalds ** transactions, but only bother if we've actually spanned 17711da177e4SLinus Torvalds ** across multiple lists 17721da177e4SLinus Torvalds */ 17731da177e4SLinus Torvalds if (flush_jl != jl) { 17741da177e4SLinus Torvalds ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 17751da177e4SLinus Torvalds } 17761da177e4SLinus Torvalds flush_journal_list(s, flush_jl, 1); 1777721a769cSJeff Mahoney put_journal_list(s, flush_jl); 1778721a769cSJeff Mahoney put_journal_list(s, jl); 17791da177e4SLinus Torvalds return 0; 17801da177e4SLinus Torvalds } 17811da177e4SLinus Torvalds 17821da177e4SLinus Torvalds /* 17831da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh. 17841da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers. 17851da177e4SLinus Torvalds */ 17861da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb, 17871da177e4SLinus Torvalds struct reiserfs_journal_cnode **table, 17881da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 17891da177e4SLinus Torvalds unsigned long block, int remove_freed) 17901da177e4SLinus Torvalds { 17911da177e4SLinus Torvalds struct reiserfs_journal_cnode *cur; 17921da177e4SLinus Torvalds struct reiserfs_journal_cnode **head; 17931da177e4SLinus Torvalds 17941da177e4SLinus Torvalds head = &(journal_hash(table, sb, block)); 17951da177e4SLinus Torvalds if (!head) { 17961da177e4SLinus Torvalds return; 17971da177e4SLinus Torvalds } 17981da177e4SLinus Torvalds cur = *head; 17991da177e4SLinus Torvalds while (cur) { 1800bd4c625cSLinus Torvalds if (cur->blocknr == block && cur->sb == sb 1801bd4c625cSLinus Torvalds && (jl == NULL || jl == cur->jlist) 1802bd4c625cSLinus Torvalds && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { 18031da177e4SLinus Torvalds if (cur->hnext) { 18041da177e4SLinus Torvalds cur->hnext->hprev = cur->hprev; 18051da177e4SLinus Torvalds } 18061da177e4SLinus Torvalds if (cur->hprev) { 18071da177e4SLinus Torvalds cur->hprev->hnext = cur->hnext; 18081da177e4SLinus Torvalds } else { 18091da177e4SLinus Torvalds *head = cur->hnext; 18101da177e4SLinus Torvalds } 18111da177e4SLinus Torvalds cur->blocknr = 0; 18121da177e4SLinus Torvalds cur->sb = NULL; 18131da177e4SLinus Torvalds cur->state = 0; 18141da177e4SLinus Torvalds if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 18151da177e4SLinus Torvalds atomic_dec(&(cur->jlist->j_nonzerolen)); 18161da177e4SLinus Torvalds cur->bh = NULL; 18171da177e4SLinus Torvalds cur->jlist = NULL; 18181da177e4SLinus Torvalds } 18191da177e4SLinus Torvalds cur = cur->hnext; 18201da177e4SLinus Torvalds } 18211da177e4SLinus Torvalds } 18221da177e4SLinus Torvalds 1823a9dd3643SJeff Mahoney static void free_journal_ram(struct super_block *sb) 1824bd4c625cSLinus Torvalds { 1825a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1826d739b42bSPekka Enberg kfree(journal->j_current_jl); 18271da177e4SLinus Torvalds journal->j_num_lists--; 18281da177e4SLinus Torvalds 18291da177e4SLinus Torvalds vfree(journal->j_cnode_free_orig); 1830a9dd3643SJeff Mahoney free_list_bitmaps(sb, journal->j_list_bitmap); 1831a9dd3643SJeff Mahoney free_bitmap_nodes(sb); /* must be after free_list_bitmaps */ 18321da177e4SLinus Torvalds if (journal->j_header_bh) { 18331da177e4SLinus Torvalds brelse(journal->j_header_bh); 18341da177e4SLinus Torvalds } 18351da177e4SLinus Torvalds /* j_header_bh is on the journal dev, make sure not to release the journal 18361da177e4SLinus Torvalds * dev until we brelse j_header_bh 18371da177e4SLinus Torvalds */ 1838a9dd3643SJeff Mahoney release_journal_dev(sb, journal); 18391da177e4SLinus Torvalds vfree(journal); 18401da177e4SLinus Torvalds } 18411da177e4SLinus Torvalds 18421da177e4SLinus Torvalds /* 18431da177e4SLinus Torvalds ** call on unmount. Only set error to 1 if you haven't made your way out 18441da177e4SLinus Torvalds ** of read_super() yet. Any other caller must keep error at 0. 18451da177e4SLinus Torvalds */ 1846bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th, 1847a9dd3643SJeff Mahoney struct super_block *sb, int error) 1848bd4c625cSLinus Torvalds { 18491da177e4SLinus Torvalds struct reiserfs_transaction_handle myth; 18501da177e4SLinus Torvalds int flushed = 0; 1851a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 18521da177e4SLinus Torvalds 18531da177e4SLinus Torvalds /* we only want to flush out transactions if we were called with error == 0 18541da177e4SLinus Torvalds */ 1855a9dd3643SJeff Mahoney if (!error && !(sb->s_flags & MS_RDONLY)) { 18561da177e4SLinus Torvalds /* end the current trans */ 18571da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 1858a9dd3643SJeff Mahoney do_journal_end(th, sb, 10, FLUSH_ALL); 18591da177e4SLinus Torvalds 18601da177e4SLinus Torvalds /* make sure something gets logged to force our way into the flush code */ 1861a9dd3643SJeff Mahoney if (!journal_join(&myth, sb, 1)) { 1862a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, 1863a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb), 1864bd4c625cSLinus Torvalds 1); 1865a9dd3643SJeff Mahoney journal_mark_dirty(&myth, sb, 1866a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb)); 1867a9dd3643SJeff Mahoney do_journal_end(&myth, sb, 1, FLUSH_ALL); 18681da177e4SLinus Torvalds flushed = 1; 18691da177e4SLinus Torvalds } 18701da177e4SLinus Torvalds } 18711da177e4SLinus Torvalds 18721da177e4SLinus Torvalds /* this also catches errors during the do_journal_end above */ 18731da177e4SLinus Torvalds if (!error && reiserfs_is_journal_aborted(journal)) { 18741da177e4SLinus Torvalds memset(&myth, 0, sizeof(myth)); 1875a9dd3643SJeff Mahoney if (!journal_join_abort(&myth, sb, 1)) { 1876a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, 1877a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb), 1878bd4c625cSLinus Torvalds 1); 1879a9dd3643SJeff Mahoney journal_mark_dirty(&myth, sb, 1880a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb)); 1881a9dd3643SJeff Mahoney do_journal_end(&myth, sb, 1, FLUSH_ALL); 18821da177e4SLinus Torvalds } 18831da177e4SLinus Torvalds } 18841da177e4SLinus Torvalds 18851da177e4SLinus Torvalds reiserfs_mounted_fs_count--; 18861da177e4SLinus Torvalds /* wait for all commits to finish */ 1887a9dd3643SJeff Mahoney cancel_delayed_work(&SB_JOURNAL(sb)->j_work); 18888ebc4232SFrederic Weisbecker 18898ebc4232SFrederic Weisbecker /* 18908ebc4232SFrederic Weisbecker * We must release the write lock here because 18918ebc4232SFrederic Weisbecker * the workqueue job (flush_async_commit) needs this lock 18928ebc4232SFrederic Weisbecker */ 18938ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 1894033369d1SArtem Bityutskiy 1895033369d1SArtem Bityutskiy cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); 18961da177e4SLinus Torvalds flush_workqueue(commit_wq); 18978ebc4232SFrederic Weisbecker 18981da177e4SLinus Torvalds if (!reiserfs_mounted_fs_count) { 18991da177e4SLinus Torvalds destroy_workqueue(commit_wq); 19001da177e4SLinus Torvalds commit_wq = NULL; 19011da177e4SLinus Torvalds } 19021da177e4SLinus Torvalds 1903a9dd3643SJeff Mahoney free_journal_ram(sb); 19041da177e4SLinus Torvalds 19050523676dSFrederic Weisbecker reiserfs_write_lock(sb); 19060523676dSFrederic Weisbecker 19071da177e4SLinus Torvalds return 0; 19081da177e4SLinus Torvalds } 19091da177e4SLinus Torvalds 19101da177e4SLinus Torvalds /* 19111da177e4SLinus Torvalds ** call on unmount. flush all journal trans, release all alloc'd ram 19121da177e4SLinus Torvalds */ 1913bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th, 1914a9dd3643SJeff Mahoney struct super_block *sb) 1915bd4c625cSLinus Torvalds { 1916a9dd3643SJeff Mahoney return do_journal_release(th, sb, 0); 19171da177e4SLinus Torvalds } 1918bd4c625cSLinus Torvalds 19191da177e4SLinus Torvalds /* 19201da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super! 19211da177e4SLinus Torvalds */ 1922bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th, 1923a9dd3643SJeff Mahoney struct super_block *sb) 1924bd4c625cSLinus Torvalds { 1925a9dd3643SJeff Mahoney return do_journal_release(th, sb, 1); 19261da177e4SLinus Torvalds } 19271da177e4SLinus Torvalds 19281da177e4SLinus Torvalds /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 1929a9dd3643SJeff Mahoney static int journal_compare_desc_commit(struct super_block *sb, 1930bd4c625cSLinus Torvalds struct reiserfs_journal_desc *desc, 1931bd4c625cSLinus Torvalds struct reiserfs_journal_commit *commit) 1932bd4c625cSLinus Torvalds { 19331da177e4SLinus Torvalds if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || 19341da177e4SLinus Torvalds get_commit_trans_len(commit) != get_desc_trans_len(desc) || 1935a9dd3643SJeff Mahoney get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max || 1936bd4c625cSLinus Torvalds get_commit_trans_len(commit) <= 0) { 19371da177e4SLinus Torvalds return 1; 19381da177e4SLinus Torvalds } 19391da177e4SLinus Torvalds return 0; 19401da177e4SLinus Torvalds } 1941bd4c625cSLinus Torvalds 19421da177e4SLinus Torvalds /* returns 0 if it did not find a description block 19431da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block 19441da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid 1945278f6679SJeff Mahoney ** NOTE: only called during fs mount 19461da177e4SLinus Torvalds */ 1947a9dd3643SJeff Mahoney static int journal_transaction_is_valid(struct super_block *sb, 1948bd4c625cSLinus Torvalds struct buffer_head *d_bh, 1949600ed416SJeff Mahoney unsigned int *oldest_invalid_trans_id, 1950bd4c625cSLinus Torvalds unsigned long *newest_mount_id) 1951bd4c625cSLinus Torvalds { 19521da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 19531da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 19541da177e4SLinus Torvalds struct buffer_head *c_bh; 19551da177e4SLinus Torvalds unsigned long offset; 19561da177e4SLinus Torvalds 19571da177e4SLinus Torvalds if (!d_bh) 19581da177e4SLinus Torvalds return 0; 19591da177e4SLinus Torvalds 19601da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 1961bd4c625cSLinus Torvalds if (get_desc_trans_len(desc) > 0 1962bd4c625cSLinus Torvalds && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { 1963bd4c625cSLinus Torvalds if (oldest_invalid_trans_id && *oldest_invalid_trans_id 1964bd4c625cSLinus Torvalds && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { 1965a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 1966bd4c625cSLinus Torvalds "journal-986: transaction " 19671da177e4SLinus Torvalds "is valid returning because trans_id %d is greater than " 1968bd4c625cSLinus Torvalds "oldest_invalid %lu", 1969bd4c625cSLinus Torvalds get_desc_trans_id(desc), 19701da177e4SLinus Torvalds *oldest_invalid_trans_id); 19711da177e4SLinus Torvalds return 0; 19721da177e4SLinus Torvalds } 1973bd4c625cSLinus Torvalds if (newest_mount_id 1974bd4c625cSLinus Torvalds && *newest_mount_id > get_desc_mount_id(desc)) { 1975a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 1976bd4c625cSLinus Torvalds "journal-1087: transaction " 19771da177e4SLinus Torvalds "is valid returning because mount_id %d is less than " 1978bd4c625cSLinus Torvalds "newest_mount_id %lu", 1979bd4c625cSLinus Torvalds get_desc_mount_id(desc), 19801da177e4SLinus Torvalds *newest_mount_id); 19811da177e4SLinus Torvalds return -1; 19821da177e4SLinus Torvalds } 1983a9dd3643SJeff Mahoney if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) { 1984a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2018", 198545b03d5eSJeff Mahoney "Bad transaction length %d " 198645b03d5eSJeff Mahoney "encountered, ignoring transaction", 1987bd4c625cSLinus Torvalds get_desc_trans_len(desc)); 19881da177e4SLinus Torvalds return -1; 19891da177e4SLinus Torvalds } 1990a9dd3643SJeff Mahoney offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 19911da177e4SLinus Torvalds 19921da177e4SLinus Torvalds /* ok, we have a journal description block, lets see if the transaction was valid */ 1993bd4c625cSLinus Torvalds c_bh = 1994a9dd3643SJeff Mahoney journal_bread(sb, 1995a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 1996bd4c625cSLinus Torvalds ((offset + get_desc_trans_len(desc) + 1997a9dd3643SJeff Mahoney 1) % SB_ONDISK_JOURNAL_SIZE(sb))); 19981da177e4SLinus Torvalds if (!c_bh) 19991da177e4SLinus Torvalds return 0; 20001da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2001a9dd3643SJeff Mahoney if (journal_compare_desc_commit(sb, desc, commit)) { 2002a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 20031da177e4SLinus Torvalds "journal_transaction_is_valid, commit offset %ld had bad " 20041da177e4SLinus Torvalds "time %d or length %d", 2005bd4c625cSLinus Torvalds c_bh->b_blocknr - 2006a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 20071da177e4SLinus Torvalds get_commit_trans_id(commit), 20081da177e4SLinus Torvalds get_commit_trans_len(commit)); 20091da177e4SLinus Torvalds brelse(c_bh); 20101da177e4SLinus Torvalds if (oldest_invalid_trans_id) { 2011bd4c625cSLinus Torvalds *oldest_invalid_trans_id = 2012bd4c625cSLinus Torvalds get_desc_trans_id(desc); 2013a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2014bd4c625cSLinus Torvalds "journal-1004: " 20151da177e4SLinus Torvalds "transaction_is_valid setting oldest invalid trans_id " 2016bd4c625cSLinus Torvalds "to %d", 2017bd4c625cSLinus Torvalds get_desc_trans_id(desc)); 20181da177e4SLinus Torvalds } 20191da177e4SLinus Torvalds return -1; 20201da177e4SLinus Torvalds } 20211da177e4SLinus Torvalds brelse(c_bh); 2022a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2023bd4c625cSLinus Torvalds "journal-1006: found valid " 20241da177e4SLinus Torvalds "transaction start offset %llu, len %d id %d", 2025bd4c625cSLinus Torvalds d_bh->b_blocknr - 2026a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 2027bd4c625cSLinus Torvalds get_desc_trans_len(desc), 2028bd4c625cSLinus Torvalds get_desc_trans_id(desc)); 20291da177e4SLinus Torvalds return 1; 20301da177e4SLinus Torvalds } else { 20311da177e4SLinus Torvalds return 0; 20321da177e4SLinus Torvalds } 20331da177e4SLinus Torvalds } 20341da177e4SLinus Torvalds 2035bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num) 2036bd4c625cSLinus Torvalds { 20371da177e4SLinus Torvalds int i; 20381da177e4SLinus Torvalds for (i = 0; i < num; i++) { 20391da177e4SLinus Torvalds brelse(heads[i]); 20401da177e4SLinus Torvalds } 20411da177e4SLinus Torvalds } 20421da177e4SLinus Torvalds 20431da177e4SLinus Torvalds /* 20441da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions, 2045278f6679SJeff Mahoney ** this either reads in a replays a transaction, or returns because the 2046278f6679SJeff Mahoney ** transaction is invalid, or too old. 2047278f6679SJeff Mahoney ** NOTE: only called during fs mount 20481da177e4SLinus Torvalds */ 2049a9dd3643SJeff Mahoney static int journal_read_transaction(struct super_block *sb, 2050bd4c625cSLinus Torvalds unsigned long cur_dblock, 2051bd4c625cSLinus Torvalds unsigned long oldest_start, 2052600ed416SJeff Mahoney unsigned int oldest_trans_id, 2053bd4c625cSLinus Torvalds unsigned long newest_mount_id) 2054bd4c625cSLinus Torvalds { 2055a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 20561da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 20571da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 2058600ed416SJeff Mahoney unsigned int trans_id = 0; 20591da177e4SLinus Torvalds struct buffer_head *c_bh; 20601da177e4SLinus Torvalds struct buffer_head *d_bh; 20611da177e4SLinus Torvalds struct buffer_head **log_blocks = NULL; 20621da177e4SLinus Torvalds struct buffer_head **real_blocks = NULL; 2063600ed416SJeff Mahoney unsigned int trans_offset; 20641da177e4SLinus Torvalds int i; 20651da177e4SLinus Torvalds int trans_half; 20661da177e4SLinus Torvalds 2067a9dd3643SJeff Mahoney d_bh = journal_bread(sb, cur_dblock); 20681da177e4SLinus Torvalds if (!d_bh) 20691da177e4SLinus Torvalds return 1; 20701da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 2071a9dd3643SJeff Mahoney trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2072a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: " 20731da177e4SLinus Torvalds "journal_read_transaction, offset %llu, len %d mount_id %d", 2074a9dd3643SJeff Mahoney d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb), 20751da177e4SLinus Torvalds get_desc_trans_len(desc), get_desc_mount_id(desc)); 20761da177e4SLinus Torvalds if (get_desc_trans_id(desc) < oldest_trans_id) { 2077a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: " 20781da177e4SLinus Torvalds "journal_read_trans skipping because %lu is too old", 2079bd4c625cSLinus Torvalds cur_dblock - 2080a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb)); 20811da177e4SLinus Torvalds brelse(d_bh); 20821da177e4SLinus Torvalds return 1; 20831da177e4SLinus Torvalds } 20841da177e4SLinus Torvalds if (get_desc_mount_id(desc) != newest_mount_id) { 2085a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: " 20861da177e4SLinus Torvalds "journal_read_trans skipping because %d is != " 20871da177e4SLinus Torvalds "newest_mount_id %lu", get_desc_mount_id(desc), 20881da177e4SLinus Torvalds newest_mount_id); 20891da177e4SLinus Torvalds brelse(d_bh); 20901da177e4SLinus Torvalds return 1; 20911da177e4SLinus Torvalds } 2092a9dd3643SJeff Mahoney c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 20931da177e4SLinus Torvalds ((trans_offset + get_desc_trans_len(desc) + 1) % 2094a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb))); 20951da177e4SLinus Torvalds if (!c_bh) { 20961da177e4SLinus Torvalds brelse(d_bh); 20971da177e4SLinus Torvalds return 1; 20981da177e4SLinus Torvalds } 20991da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2100a9dd3643SJeff Mahoney if (journal_compare_desc_commit(sb, desc, commit)) { 2101a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2102bd4c625cSLinus Torvalds "journal_read_transaction, " 21031da177e4SLinus Torvalds "commit offset %llu had bad time %d or length %d", 2104bd4c625cSLinus Torvalds c_bh->b_blocknr - 2105a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 2106bd4c625cSLinus Torvalds get_commit_trans_id(commit), 2107bd4c625cSLinus Torvalds get_commit_trans_len(commit)); 21081da177e4SLinus Torvalds brelse(c_bh); 21091da177e4SLinus Torvalds brelse(d_bh); 21101da177e4SLinus Torvalds return 1; 21111da177e4SLinus Torvalds } 21123f8b5ee3SJeff Mahoney 21133f8b5ee3SJeff Mahoney if (bdev_read_only(sb->s_bdev)) { 21143f8b5ee3SJeff Mahoney reiserfs_warning(sb, "clm-2076", 21153f8b5ee3SJeff Mahoney "device is readonly, unable to replay log"); 21163f8b5ee3SJeff Mahoney brelse(c_bh); 21173f8b5ee3SJeff Mahoney brelse(d_bh); 21183f8b5ee3SJeff Mahoney return -EROFS; 21193f8b5ee3SJeff Mahoney } 21203f8b5ee3SJeff Mahoney 21211da177e4SLinus Torvalds trans_id = get_desc_trans_id(desc); 21221da177e4SLinus Torvalds /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2123d739b42bSPekka Enberg log_blocks = kmalloc(get_desc_trans_len(desc) * 2124d739b42bSPekka Enberg sizeof(struct buffer_head *), GFP_NOFS); 2125d739b42bSPekka Enberg real_blocks = kmalloc(get_desc_trans_len(desc) * 2126d739b42bSPekka Enberg sizeof(struct buffer_head *), GFP_NOFS); 21271da177e4SLinus Torvalds if (!log_blocks || !real_blocks) { 21281da177e4SLinus Torvalds brelse(c_bh); 21291da177e4SLinus Torvalds brelse(d_bh); 2130d739b42bSPekka Enberg kfree(log_blocks); 2131d739b42bSPekka Enberg kfree(real_blocks); 2132a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1169", 213345b03d5eSJeff Mahoney "kmalloc failed, unable to mount FS"); 21341da177e4SLinus Torvalds return -1; 21351da177e4SLinus Torvalds } 21361da177e4SLinus Torvalds /* get all the buffer heads */ 2137a9dd3643SJeff Mahoney trans_half = journal_trans_half(sb->s_blocksize); 21381da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 2139bd4c625cSLinus Torvalds log_blocks[i] = 2140a9dd3643SJeff Mahoney journal_getblk(sb, 2141a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2142bd4c625cSLinus Torvalds (trans_offset + 1 + 2143a9dd3643SJeff Mahoney i) % SB_ONDISK_JOURNAL_SIZE(sb)); 21441da177e4SLinus Torvalds if (i < trans_half) { 2145bd4c625cSLinus Torvalds real_blocks[i] = 2146a9dd3643SJeff Mahoney sb_getblk(sb, 2147bd4c625cSLinus Torvalds le32_to_cpu(desc->j_realblock[i])); 21481da177e4SLinus Torvalds } else { 2149bd4c625cSLinus Torvalds real_blocks[i] = 2150a9dd3643SJeff Mahoney sb_getblk(sb, 2151bd4c625cSLinus Torvalds le32_to_cpu(commit-> 2152bd4c625cSLinus Torvalds j_realblock[i - trans_half])); 21531da177e4SLinus Torvalds } 2154a9dd3643SJeff Mahoney if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) { 2155a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1207", 215645b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 215745b03d5eSJeff Mahoney "Block to replay is outside of " 215845b03d5eSJeff Mahoney "filesystem"); 21591da177e4SLinus Torvalds goto abort_replay; 21601da177e4SLinus Torvalds } 21611da177e4SLinus Torvalds /* make sure we don't try to replay onto log or reserved area */ 2162bd4c625cSLinus Torvalds if (is_block_in_log_or_reserved_area 2163a9dd3643SJeff Mahoney (sb, real_blocks[i]->b_blocknr)) { 2164a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1204", 216545b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 216645b03d5eSJeff Mahoney "Trying to replay onto a log block"); 21671da177e4SLinus Torvalds abort_replay: 21681da177e4SLinus Torvalds brelse_array(log_blocks, i); 21691da177e4SLinus Torvalds brelse_array(real_blocks, i); 21701da177e4SLinus Torvalds brelse(c_bh); 21711da177e4SLinus Torvalds brelse(d_bh); 2172d739b42bSPekka Enberg kfree(log_blocks); 2173d739b42bSPekka Enberg kfree(real_blocks); 21741da177e4SLinus Torvalds return -1; 21751da177e4SLinus Torvalds } 21761da177e4SLinus Torvalds } 21771da177e4SLinus Torvalds /* read in the log blocks, memcpy to the corresponding real block */ 21781da177e4SLinus Torvalds ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 21791da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 21808ebc4232SFrederic Weisbecker 21811da177e4SLinus Torvalds wait_on_buffer(log_blocks[i]); 21821da177e4SLinus Torvalds if (!buffer_uptodate(log_blocks[i])) { 2183a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1212", 218445b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 218545b03d5eSJeff Mahoney "buffer write failed"); 2186bd4c625cSLinus Torvalds brelse_array(log_blocks + i, 2187bd4c625cSLinus Torvalds get_desc_trans_len(desc) - i); 21881da177e4SLinus Torvalds brelse_array(real_blocks, get_desc_trans_len(desc)); 21891da177e4SLinus Torvalds brelse(c_bh); 21901da177e4SLinus Torvalds brelse(d_bh); 2191d739b42bSPekka Enberg kfree(log_blocks); 2192d739b42bSPekka Enberg kfree(real_blocks); 21931da177e4SLinus Torvalds return -1; 21941da177e4SLinus Torvalds } 2195bd4c625cSLinus Torvalds memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, 2196bd4c625cSLinus Torvalds real_blocks[i]->b_size); 21971da177e4SLinus Torvalds set_buffer_uptodate(real_blocks[i]); 21981da177e4SLinus Torvalds brelse(log_blocks[i]); 21991da177e4SLinus Torvalds } 22001da177e4SLinus Torvalds /* flush out the real blocks */ 22011da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 22021da177e4SLinus Torvalds set_buffer_dirty(real_blocks[i]); 22039cb569d6SChristoph Hellwig write_dirty_buffer(real_blocks[i], WRITE); 22041da177e4SLinus Torvalds } 22051da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 22061da177e4SLinus Torvalds wait_on_buffer(real_blocks[i]); 22071da177e4SLinus Torvalds if (!buffer_uptodate(real_blocks[i])) { 2208a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1226", 220945b03d5eSJeff Mahoney "REPLAY FAILURE, fsck required! " 221045b03d5eSJeff Mahoney "buffer write failed"); 2211bd4c625cSLinus Torvalds brelse_array(real_blocks + i, 2212bd4c625cSLinus Torvalds get_desc_trans_len(desc) - i); 22131da177e4SLinus Torvalds brelse(c_bh); 22141da177e4SLinus Torvalds brelse(d_bh); 2215d739b42bSPekka Enberg kfree(log_blocks); 2216d739b42bSPekka Enberg kfree(real_blocks); 22171da177e4SLinus Torvalds return -1; 22181da177e4SLinus Torvalds } 22191da177e4SLinus Torvalds brelse(real_blocks[i]); 22201da177e4SLinus Torvalds } 2221bd4c625cSLinus Torvalds cur_dblock = 2222a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2223bd4c625cSLinus Torvalds ((trans_offset + get_desc_trans_len(desc) + 2224a9dd3643SJeff Mahoney 2) % SB_ONDISK_JOURNAL_SIZE(sb)); 2225a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2226bd4c625cSLinus Torvalds "journal-1095: setting journal " "start to offset %ld", 2227a9dd3643SJeff Mahoney cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); 22281da177e4SLinus Torvalds 22291da177e4SLinus Torvalds /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2230a9dd3643SJeff Mahoney journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 22311da177e4SLinus Torvalds journal->j_last_flush_trans_id = trans_id; 22321da177e4SLinus Torvalds journal->j_trans_id = trans_id + 1; 2233a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 2234a44c94a7SAlexander Zarochentsev if (journal->j_trans_id == 0) 2235a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 22361da177e4SLinus Torvalds brelse(c_bh); 22371da177e4SLinus Torvalds brelse(d_bh); 2238d739b42bSPekka Enberg kfree(log_blocks); 2239d739b42bSPekka Enberg kfree(real_blocks); 22401da177e4SLinus Torvalds return 0; 22411da177e4SLinus Torvalds } 22421da177e4SLinus Torvalds 22431da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize 22441da177e4SLinus Torvalds size (but no more than BUFNR blocks at a time). This proved to improve 22451da177e4SLinus Torvalds mounting speed on self-rebuilding raid5 arrays at least. 22461da177e4SLinus Torvalds Right now it is only used from journal code. But later we might use it 22471da177e4SLinus Torvalds from other places. 22481da177e4SLinus Torvalds Note: Do not use journal_getblk/sb_getblk functions here! */ 22493ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev, 22503ee16670SJeff Mahoney b_blocknr_t block, int bufsize, 22513ee16670SJeff Mahoney b_blocknr_t max_block) 22521da177e4SLinus Torvalds { 22531da177e4SLinus Torvalds struct buffer_head *bhlist[BUFNR]; 22541da177e4SLinus Torvalds unsigned int blocks = BUFNR; 22551da177e4SLinus Torvalds struct buffer_head *bh; 22561da177e4SLinus Torvalds int i, j; 22571da177e4SLinus Torvalds 22581da177e4SLinus Torvalds bh = __getblk(dev, block, bufsize); 22591da177e4SLinus Torvalds if (buffer_uptodate(bh)) 22601da177e4SLinus Torvalds return (bh); 22611da177e4SLinus Torvalds 22621da177e4SLinus Torvalds if (block + BUFNR > max_block) { 22631da177e4SLinus Torvalds blocks = max_block - block; 22641da177e4SLinus Torvalds } 22651da177e4SLinus Torvalds bhlist[0] = bh; 22661da177e4SLinus Torvalds j = 1; 22671da177e4SLinus Torvalds for (i = 1; i < blocks; i++) { 22681da177e4SLinus Torvalds bh = __getblk(dev, block + i, bufsize); 22691da177e4SLinus Torvalds if (buffer_uptodate(bh)) { 22701da177e4SLinus Torvalds brelse(bh); 22711da177e4SLinus Torvalds break; 2272bd4c625cSLinus Torvalds } else 2273bd4c625cSLinus Torvalds bhlist[j++] = bh; 22741da177e4SLinus Torvalds } 22751da177e4SLinus Torvalds ll_rw_block(READ, j, bhlist); 22761da177e4SLinus Torvalds for (i = 1; i < j; i++) 22771da177e4SLinus Torvalds brelse(bhlist[i]); 22781da177e4SLinus Torvalds bh = bhlist[0]; 22791da177e4SLinus Torvalds wait_on_buffer(bh); 22801da177e4SLinus Torvalds if (buffer_uptodate(bh)) 22811da177e4SLinus Torvalds return bh; 22821da177e4SLinus Torvalds brelse(bh); 22831da177e4SLinus Torvalds return NULL; 22841da177e4SLinus Torvalds } 22851da177e4SLinus Torvalds 22861da177e4SLinus Torvalds /* 22871da177e4SLinus Torvalds ** read and replay the log 2288278f6679SJeff Mahoney ** on a clean unmount, the journal header's next unflushed pointer will 2289278f6679SJeff Mahoney ** be to an invalid transaction. This tests that before finding all the 2290278f6679SJeff Mahoney ** transactions in the log, which makes normal mount times fast. 2291278f6679SJeff Mahoney ** After a crash, this starts with the next unflushed transaction, and 2292278f6679SJeff Mahoney ** replays until it finds one too old, or invalid. 22931da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly. 2294278f6679SJeff Mahoney ** NOTE: only called during fs mount 22951da177e4SLinus Torvalds */ 2296a9dd3643SJeff Mahoney static int journal_read(struct super_block *sb) 2297bd4c625cSLinus Torvalds { 2298a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 22991da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 2300600ed416SJeff Mahoney unsigned int oldest_trans_id = 0; 2301600ed416SJeff Mahoney unsigned int oldest_invalid_trans_id = 0; 23021da177e4SLinus Torvalds time_t start; 23031da177e4SLinus Torvalds unsigned long oldest_start = 0; 23041da177e4SLinus Torvalds unsigned long cur_dblock = 0; 23051da177e4SLinus Torvalds unsigned long newest_mount_id = 9; 23061da177e4SLinus Torvalds struct buffer_head *d_bh; 23071da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 23081da177e4SLinus Torvalds int valid_journal_header = 0; 23091da177e4SLinus Torvalds int replay_count = 0; 23101da177e4SLinus Torvalds int continue_replay = 1; 23111da177e4SLinus Torvalds int ret; 23121da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 23131da177e4SLinus Torvalds 2314a9dd3643SJeff Mahoney cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2315a9dd3643SJeff Mahoney reiserfs_info(sb, "checking transaction log (%s)\n", 23161da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b)); 23171da177e4SLinus Torvalds start = get_seconds(); 23181da177e4SLinus Torvalds 23191da177e4SLinus Torvalds /* step 1, read in the journal header block. Check the transaction it says 23201da177e4SLinus Torvalds ** is the first unflushed, and if that transaction is not valid, 23211da177e4SLinus Torvalds ** replay is done 23221da177e4SLinus Torvalds */ 2323a9dd3643SJeff Mahoney journal->j_header_bh = journal_bread(sb, 2324a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) 2325a9dd3643SJeff Mahoney + SB_ONDISK_JOURNAL_SIZE(sb)); 23261da177e4SLinus Torvalds if (!journal->j_header_bh) { 23271da177e4SLinus Torvalds return 1; 23281da177e4SLinus Torvalds } 23291da177e4SLinus Torvalds jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); 2330c499ec24SVladimir V. Saveliev if (le32_to_cpu(jh->j_first_unflushed_offset) < 2331a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb) 2332bd4c625cSLinus Torvalds && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { 2333bd4c625cSLinus Torvalds oldest_start = 2334a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 23351da177e4SLinus Torvalds le32_to_cpu(jh->j_first_unflushed_offset); 23361da177e4SLinus Torvalds oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; 23371da177e4SLinus Torvalds newest_mount_id = le32_to_cpu(jh->j_mount_id); 2338a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2339bd4c625cSLinus Torvalds "journal-1153: found in " 23401da177e4SLinus Torvalds "header: first_unflushed_offset %d, last_flushed_trans_id " 23411da177e4SLinus Torvalds "%lu", le32_to_cpu(jh->j_first_unflushed_offset), 23421da177e4SLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id)); 23431da177e4SLinus Torvalds valid_journal_header = 1; 23441da177e4SLinus Torvalds 23451da177e4SLinus Torvalds /* now, we try to read the first unflushed offset. If it is not valid, 23461da177e4SLinus Torvalds ** there is nothing more we can do, and it makes no sense to read 23471da177e4SLinus Torvalds ** through the whole log. 23481da177e4SLinus Torvalds */ 2349bd4c625cSLinus Torvalds d_bh = 2350a9dd3643SJeff Mahoney journal_bread(sb, 2351a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2352bd4c625cSLinus Torvalds le32_to_cpu(jh->j_first_unflushed_offset)); 2353a9dd3643SJeff Mahoney ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL); 23541da177e4SLinus Torvalds if (!ret) { 23551da177e4SLinus Torvalds continue_replay = 0; 23561da177e4SLinus Torvalds } 23571da177e4SLinus Torvalds brelse(d_bh); 23581da177e4SLinus Torvalds goto start_log_replay; 23591da177e4SLinus Torvalds } 23601da177e4SLinus Torvalds 23611da177e4SLinus Torvalds /* ok, there are transactions that need to be replayed. start with the first log block, find 23621da177e4SLinus Torvalds ** all the valid transactions, and pick out the oldest. 23631da177e4SLinus Torvalds */ 2364bd4c625cSLinus Torvalds while (continue_replay 2365bd4c625cSLinus Torvalds && cur_dblock < 2366a9dd3643SJeff Mahoney (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2367a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb))) { 23681da177e4SLinus Torvalds /* Note that it is required for blocksize of primary fs device and journal 23691da177e4SLinus Torvalds device to be the same */ 2370bd4c625cSLinus Torvalds d_bh = 2371bd4c625cSLinus Torvalds reiserfs_breada(journal->j_dev_bd, cur_dblock, 2372a9dd3643SJeff Mahoney sb->s_blocksize, 2373a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2374a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)); 2375bd4c625cSLinus Torvalds ret = 2376a9dd3643SJeff Mahoney journal_transaction_is_valid(sb, d_bh, 2377bd4c625cSLinus Torvalds &oldest_invalid_trans_id, 2378bd4c625cSLinus Torvalds &newest_mount_id); 23791da177e4SLinus Torvalds if (ret == 1) { 23801da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 23811da177e4SLinus Torvalds if (oldest_start == 0) { /* init all oldest_ values */ 23821da177e4SLinus Torvalds oldest_trans_id = get_desc_trans_id(desc); 23831da177e4SLinus Torvalds oldest_start = d_bh->b_blocknr; 23841da177e4SLinus Torvalds newest_mount_id = get_desc_mount_id(desc); 2385a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2386bd4c625cSLinus Torvalds "journal-1179: Setting " 23871da177e4SLinus Torvalds "oldest_start to offset %llu, trans_id %lu", 2388bd4c625cSLinus Torvalds oldest_start - 2389bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK 2390a9dd3643SJeff Mahoney (sb), oldest_trans_id); 23911da177e4SLinus Torvalds } else if (oldest_trans_id > get_desc_trans_id(desc)) { 23921da177e4SLinus Torvalds /* one we just read was older */ 23931da177e4SLinus Torvalds oldest_trans_id = get_desc_trans_id(desc); 23941da177e4SLinus Torvalds oldest_start = d_bh->b_blocknr; 2395a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2396bd4c625cSLinus Torvalds "journal-1180: Resetting " 23971da177e4SLinus Torvalds "oldest_start to offset %lu, trans_id %lu", 2398bd4c625cSLinus Torvalds oldest_start - 2399bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK 2400a9dd3643SJeff Mahoney (sb), oldest_trans_id); 24011da177e4SLinus Torvalds } 24021da177e4SLinus Torvalds if (newest_mount_id < get_desc_mount_id(desc)) { 24031da177e4SLinus Torvalds newest_mount_id = get_desc_mount_id(desc); 2404a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2405bd4c625cSLinus Torvalds "journal-1299: Setting " 2406bd4c625cSLinus Torvalds "newest_mount_id to %d", 2407bd4c625cSLinus Torvalds get_desc_mount_id(desc)); 24081da177e4SLinus Torvalds } 24091da177e4SLinus Torvalds cur_dblock += get_desc_trans_len(desc) + 2; 24101da177e4SLinus Torvalds } else { 24111da177e4SLinus Torvalds cur_dblock++; 24121da177e4SLinus Torvalds } 24131da177e4SLinus Torvalds brelse(d_bh); 24141da177e4SLinus Torvalds } 24151da177e4SLinus Torvalds 24161da177e4SLinus Torvalds start_log_replay: 24171da177e4SLinus Torvalds cur_dblock = oldest_start; 24181da177e4SLinus Torvalds if (oldest_trans_id) { 2419a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2420bd4c625cSLinus Torvalds "journal-1206: Starting replay " 24211da177e4SLinus Torvalds "from offset %llu, trans_id %lu", 2422a9dd3643SJeff Mahoney cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb), 24231da177e4SLinus Torvalds oldest_trans_id); 24241da177e4SLinus Torvalds 24251da177e4SLinus Torvalds } 24261da177e4SLinus Torvalds replay_count = 0; 24271da177e4SLinus Torvalds while (continue_replay && oldest_trans_id > 0) { 2428bd4c625cSLinus Torvalds ret = 2429a9dd3643SJeff Mahoney journal_read_transaction(sb, cur_dblock, oldest_start, 2430bd4c625cSLinus Torvalds oldest_trans_id, newest_mount_id); 24311da177e4SLinus Torvalds if (ret < 0) { 24321da177e4SLinus Torvalds return ret; 24331da177e4SLinus Torvalds } else if (ret != 0) { 24341da177e4SLinus Torvalds break; 24351da177e4SLinus Torvalds } 2436bd4c625cSLinus Torvalds cur_dblock = 2437a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start; 24381da177e4SLinus Torvalds replay_count++; 24391da177e4SLinus Torvalds if (cur_dblock == oldest_start) 24401da177e4SLinus Torvalds break; 24411da177e4SLinus Torvalds } 24421da177e4SLinus Torvalds 24431da177e4SLinus Torvalds if (oldest_trans_id == 0) { 2444a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2445bd4c625cSLinus Torvalds "journal-1225: No valid " "transactions found"); 24461da177e4SLinus Torvalds } 24471da177e4SLinus Torvalds /* j_start does not get set correctly if we don't replay any transactions. 24481da177e4SLinus Torvalds ** if we had a valid journal_header, set j_start to the first unflushed transaction value, 24491da177e4SLinus Torvalds ** copy the trans_id from the header 24501da177e4SLinus Torvalds */ 24511da177e4SLinus Torvalds if (valid_journal_header && replay_count == 0) { 24521da177e4SLinus Torvalds journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); 2453bd4c625cSLinus Torvalds journal->j_trans_id = 2454bd4c625cSLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2455a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 2456a44c94a7SAlexander Zarochentsev if (journal->j_trans_id == 0) 2457a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 2458bd4c625cSLinus Torvalds journal->j_last_flush_trans_id = 2459bd4c625cSLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id); 24601da177e4SLinus Torvalds journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; 24611da177e4SLinus Torvalds } else { 24621da177e4SLinus Torvalds journal->j_mount_id = newest_mount_id + 1; 24631da177e4SLinus Torvalds } 2464a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " 24651da177e4SLinus Torvalds "newest_mount_id to %lu", journal->j_mount_id); 24661da177e4SLinus Torvalds journal->j_first_unflushed_offset = journal->j_start; 24671da177e4SLinus Torvalds if (replay_count > 0) { 2468a9dd3643SJeff Mahoney reiserfs_info(sb, 2469bd4c625cSLinus Torvalds "replayed %d transactions in %lu seconds\n", 24701da177e4SLinus Torvalds replay_count, get_seconds() - start); 24711da177e4SLinus Torvalds } 2472278f6679SJeff Mahoney /* needed to satisfy the locking in _update_journal_header_block */ 2473278f6679SJeff Mahoney reiserfs_write_lock(sb); 2474a9dd3643SJeff Mahoney if (!bdev_read_only(sb->s_bdev) && 2475a9dd3643SJeff Mahoney _update_journal_header_block(sb, journal->j_start, 2476bd4c625cSLinus Torvalds journal->j_last_flush_trans_id)) { 2477278f6679SJeff Mahoney reiserfs_write_unlock(sb); 24781da177e4SLinus Torvalds /* replay failed, caller must call free_journal_ram and abort 24791da177e4SLinus Torvalds ** the mount 24801da177e4SLinus Torvalds */ 24811da177e4SLinus Torvalds return -1; 24821da177e4SLinus Torvalds } 2483278f6679SJeff Mahoney reiserfs_write_unlock(sb); 24841da177e4SLinus Torvalds return 0; 24851da177e4SLinus Torvalds } 24861da177e4SLinus Torvalds 24871da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) 24881da177e4SLinus Torvalds { 24891da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 24908c777cc4SPekka Enberg jl = kzalloc(sizeof(struct reiserfs_journal_list), 24918c777cc4SPekka Enberg GFP_NOFS | __GFP_NOFAIL); 24921da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_list); 24931da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_working_list); 24941da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_tail_bh_list); 24951da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_bh_list); 249690415deaSJeff Mahoney mutex_init(&jl->j_commit_mutex); 24971da177e4SLinus Torvalds SB_JOURNAL(s)->j_num_lists++; 24981da177e4SLinus Torvalds get_journal_list(jl); 24991da177e4SLinus Torvalds return jl; 25001da177e4SLinus Torvalds } 25011da177e4SLinus Torvalds 2502a9dd3643SJeff Mahoney static void journal_list_init(struct super_block *sb) 2503bd4c625cSLinus Torvalds { 2504a9dd3643SJeff Mahoney SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb); 25051da177e4SLinus Torvalds } 25061da177e4SLinus Torvalds 25074385bab1SAl Viro static void release_journal_dev(struct super_block *super, 25081da177e4SLinus Torvalds struct reiserfs_journal *journal) 25091da177e4SLinus Torvalds { 251086098fa0SChristoph Hellwig if (journal->j_dev_bd != NULL) { 25114385bab1SAl Viro blkdev_put(journal->j_dev_bd, journal->j_dev_mode); 25121da177e4SLinus Torvalds journal->j_dev_bd = NULL; 25131da177e4SLinus Torvalds } 25141da177e4SLinus Torvalds } 25151da177e4SLinus Torvalds 25161da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super, 25171da177e4SLinus Torvalds struct reiserfs_journal *journal, 25181da177e4SLinus Torvalds const char *jdev_name) 25191da177e4SLinus Torvalds { 25201da177e4SLinus Torvalds int result; 25211da177e4SLinus Torvalds dev_t jdev; 2522e525fd89STejun Heo fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; 25231da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 25241da177e4SLinus Torvalds 25251da177e4SLinus Torvalds result = 0; 25261da177e4SLinus Torvalds 25271da177e4SLinus Torvalds journal->j_dev_bd = NULL; 25281da177e4SLinus Torvalds jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 25291da177e4SLinus Torvalds new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 25301da177e4SLinus Torvalds 25311da177e4SLinus Torvalds if (bdev_read_only(super->s_bdev)) 25321da177e4SLinus Torvalds blkdev_mode = FMODE_READ; 25331da177e4SLinus Torvalds 25341da177e4SLinus Torvalds /* there is no "jdev" option and journal is on separate device */ 25351da177e4SLinus Torvalds if ((!jdev_name || !jdev_name[0])) { 2536e525fd89STejun Heo if (jdev == super->s_dev) 2537e525fd89STejun Heo blkdev_mode &= ~FMODE_EXCL; 2538d4d77629STejun Heo journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, 2539d4d77629STejun Heo journal); 2540e5eb8caaSAl Viro journal->j_dev_mode = blkdev_mode; 25411da177e4SLinus Torvalds if (IS_ERR(journal->j_dev_bd)) { 25421da177e4SLinus Torvalds result = PTR_ERR(journal->j_dev_bd); 25431da177e4SLinus Torvalds journal->j_dev_bd = NULL; 254445b03d5eSJeff Mahoney reiserfs_warning(super, "sh-458", 25451da177e4SLinus Torvalds "cannot init journal device '%s': %i", 25461da177e4SLinus Torvalds __bdevname(jdev, b), result); 25471da177e4SLinus Torvalds return result; 2548e525fd89STejun Heo } else if (jdev != super->s_dev) 25491da177e4SLinus Torvalds set_blocksize(journal->j_dev_bd, super->s_blocksize); 255086098fa0SChristoph Hellwig 25511da177e4SLinus Torvalds return 0; 25521da177e4SLinus Torvalds } 25531da177e4SLinus Torvalds 2554e5eb8caaSAl Viro journal->j_dev_mode = blkdev_mode; 2555d4d77629STejun Heo journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal); 255686098fa0SChristoph Hellwig if (IS_ERR(journal->j_dev_bd)) { 255786098fa0SChristoph Hellwig result = PTR_ERR(journal->j_dev_bd); 255886098fa0SChristoph Hellwig journal->j_dev_bd = NULL; 255986098fa0SChristoph Hellwig reiserfs_warning(super, 256086098fa0SChristoph Hellwig "journal_init_dev: Cannot open '%s': %i", 256186098fa0SChristoph Hellwig jdev_name, result); 256286098fa0SChristoph Hellwig return result; 256386098fa0SChristoph Hellwig } 256486098fa0SChristoph Hellwig 25651da177e4SLinus Torvalds set_blocksize(journal->j_dev_bd, super->s_blocksize); 2566bd4c625cSLinus Torvalds reiserfs_info(super, 2567bd4c625cSLinus Torvalds "journal_init_dev: journal device: %s\n", 256874f9f974SEdward Shishkin bdevname(journal->j_dev_bd, b)); 256986098fa0SChristoph Hellwig return 0; 25701da177e4SLinus Torvalds } 25711da177e4SLinus Torvalds 2572cf3d0b81SEdward Shishkin /** 2573cf3d0b81SEdward Shishkin * When creating/tuning a file system user can assign some 2574cf3d0b81SEdward Shishkin * journal params within boundaries which depend on the ratio 2575cf3d0b81SEdward Shishkin * blocksize/standard_blocksize. 2576cf3d0b81SEdward Shishkin * 2577cf3d0b81SEdward Shishkin * For blocks >= standard_blocksize transaction size should 2578cf3d0b81SEdward Shishkin * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more 2579cf3d0b81SEdward Shishkin * then JOURNAL_TRANS_MAX_DEFAULT. 2580cf3d0b81SEdward Shishkin * 2581cf3d0b81SEdward Shishkin * For blocks < standard_blocksize these boundaries should be 2582cf3d0b81SEdward Shishkin * decreased proportionally. 2583cf3d0b81SEdward Shishkin */ 2584cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096) 2585cf3d0b81SEdward Shishkin 2586a9dd3643SJeff Mahoney static int check_advise_trans_params(struct super_block *sb, 2587cf3d0b81SEdward Shishkin struct reiserfs_journal *journal) 2588cf3d0b81SEdward Shishkin { 2589cf3d0b81SEdward Shishkin if (journal->j_trans_max) { 2590cf3d0b81SEdward Shishkin /* Non-default journal params. 2591cf3d0b81SEdward Shishkin Do sanity check for them. */ 2592cf3d0b81SEdward Shishkin int ratio = 1; 2593a9dd3643SJeff Mahoney if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) 2594a9dd3643SJeff Mahoney ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; 2595cf3d0b81SEdward Shishkin 2596cf3d0b81SEdward Shishkin if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio || 2597cf3d0b81SEdward Shishkin journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || 2598a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max < 2599cf3d0b81SEdward Shishkin JOURNAL_MIN_RATIO) { 2600a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-462", 260145b03d5eSJeff Mahoney "bad transaction max size (%u). " 260245b03d5eSJeff Mahoney "FSCK?", journal->j_trans_max); 2603cf3d0b81SEdward Shishkin return 1; 2604cf3d0b81SEdward Shishkin } 2605cf3d0b81SEdward Shishkin if (journal->j_max_batch != (journal->j_trans_max) * 2606cf3d0b81SEdward Shishkin JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { 2607a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-463", 260845b03d5eSJeff Mahoney "bad transaction max batch (%u). " 260945b03d5eSJeff Mahoney "FSCK?", journal->j_max_batch); 2610cf3d0b81SEdward Shishkin return 1; 2611cf3d0b81SEdward Shishkin } 2612cf3d0b81SEdward Shishkin } else { 2613cf3d0b81SEdward Shishkin /* Default journal params. 2614cf3d0b81SEdward Shishkin The file system was created by old version 2615cf3d0b81SEdward Shishkin of mkreiserfs, so some fields contain zeros, 2616cf3d0b81SEdward Shishkin and we need to advise proper values for them */ 2617a9dd3643SJeff Mahoney if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { 2618a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", 2619a9dd3643SJeff Mahoney sb->s_blocksize); 262045b03d5eSJeff Mahoney return 1; 262145b03d5eSJeff Mahoney } 2622cf3d0b81SEdward Shishkin journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; 2623cf3d0b81SEdward Shishkin journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; 2624cf3d0b81SEdward Shishkin journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; 2625cf3d0b81SEdward Shishkin } 2626cf3d0b81SEdward Shishkin return 0; 2627cf3d0b81SEdward Shishkin } 2628cf3d0b81SEdward Shishkin 26291da177e4SLinus Torvalds /* 26301da177e4SLinus Torvalds ** must be called once on fs mount. calls journal_read for you 26311da177e4SLinus Torvalds */ 2632a9dd3643SJeff Mahoney int journal_init(struct super_block *sb, const char *j_dev_name, 2633bd4c625cSLinus Torvalds int old_format, unsigned int commit_max_age) 2634bd4c625cSLinus Torvalds { 2635a9dd3643SJeff Mahoney int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2; 26361da177e4SLinus Torvalds struct buffer_head *bhjh; 26371da177e4SLinus Torvalds struct reiserfs_super_block *rs; 26381da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 26391da177e4SLinus Torvalds struct reiserfs_journal *journal; 26401da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 26411da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 264298ea3f50SFrederic Weisbecker int ret; 26431da177e4SLinus Torvalds 2644558feb08SJoe Perches journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); 26451da177e4SLinus Torvalds if (!journal) { 2646a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1256", 264745b03d5eSJeff Mahoney "unable to get memory for journal structure"); 26481da177e4SLinus Torvalds return 1; 26491da177e4SLinus Torvalds } 26501da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_bitmap_nodes); 26511da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_prealloc_list); 26521da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_working_list); 26531da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_journal_list); 26541da177e4SLinus Torvalds journal->j_persistent_trans = 0; 265537c69b98SFrederic Weisbecker if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, 265637c69b98SFrederic Weisbecker reiserfs_bmap_count(sb))) 26571da177e4SLinus Torvalds goto free_and_return; 265898ea3f50SFrederic Weisbecker 2659a9dd3643SJeff Mahoney allocate_bitmap_nodes(sb); 26601da177e4SLinus Torvalds 26611da177e4SLinus Torvalds /* reserved for journal area support */ 2662a9dd3643SJeff Mahoney SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ? 2663bd4c625cSLinus Torvalds REISERFS_OLD_DISK_OFFSET_IN_BYTES 2664a9dd3643SJeff Mahoney / sb->s_blocksize + 2665a9dd3643SJeff Mahoney reiserfs_bmap_count(sb) + 2666bd4c625cSLinus Torvalds 1 : 2667bd4c625cSLinus Torvalds REISERFS_DISK_OFFSET_IN_BYTES / 2668a9dd3643SJeff Mahoney sb->s_blocksize + 2); 26691da177e4SLinus Torvalds 267025985edcSLucas De Marchi /* Sanity check to see is the standard journal fitting within first bitmap 26711da177e4SLinus Torvalds (actual for small blocksizes) */ 2672a9dd3643SJeff Mahoney if (!SB_ONDISK_JOURNAL_DEVICE(sb) && 2673a9dd3643SJeff Mahoney (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + 2674a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { 2675a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1393", 267645b03d5eSJeff Mahoney "journal does not fit for area addressed " 267745b03d5eSJeff Mahoney "by first of bitmap blocks. It starts at " 26781da177e4SLinus Torvalds "%u and its size is %u. Block size %ld", 2679a9dd3643SJeff Mahoney SB_JOURNAL_1st_RESERVED_BLOCK(sb), 2680a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb), 2681a9dd3643SJeff Mahoney sb->s_blocksize); 26821da177e4SLinus Torvalds goto free_and_return; 26831da177e4SLinus Torvalds } 26841da177e4SLinus Torvalds 2685a9dd3643SJeff Mahoney if (journal_init_dev(sb, journal, j_dev_name) != 0) { 2686a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-462", 268745b03d5eSJeff Mahoney "unable to initialize jornal device"); 26881da177e4SLinus Torvalds goto free_and_return; 26891da177e4SLinus Torvalds } 26901da177e4SLinus Torvalds 2691a9dd3643SJeff Mahoney rs = SB_DISK_SUPER_BLOCK(sb); 26921da177e4SLinus Torvalds 26931da177e4SLinus Torvalds /* read journal header */ 2694a9dd3643SJeff Mahoney bhjh = journal_bread(sb, 2695a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2696a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)); 26971da177e4SLinus Torvalds if (!bhjh) { 2698a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-459", 269945b03d5eSJeff Mahoney "unable to read journal header"); 27001da177e4SLinus Torvalds goto free_and_return; 27011da177e4SLinus Torvalds } 27021da177e4SLinus Torvalds jh = (struct reiserfs_journal_header *)(bhjh->b_data); 27031da177e4SLinus Torvalds 27041da177e4SLinus Torvalds /* make sure that journal matches to the super block */ 2705bd4c625cSLinus Torvalds if (is_reiserfs_jr(rs) 2706bd4c625cSLinus Torvalds && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != 2707bd4c625cSLinus Torvalds sb_jp_journal_magic(rs))) { 2708a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-460", 270945b03d5eSJeff Mahoney "journal header magic %x (device %s) does " 271045b03d5eSJeff Mahoney "not match to magic found in super block %x", 271145b03d5eSJeff Mahoney jh->jh_journal.jp_journal_magic, 27121da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b), 27131da177e4SLinus Torvalds sb_jp_journal_magic(rs)); 27141da177e4SLinus Torvalds brelse(bhjh); 27151da177e4SLinus Torvalds goto free_and_return; 27161da177e4SLinus Torvalds } 27171da177e4SLinus Torvalds 27181da177e4SLinus Torvalds journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); 27191da177e4SLinus Torvalds journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); 2720bd4c625cSLinus Torvalds journal->j_max_commit_age = 2721bd4c625cSLinus Torvalds le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); 27221da177e4SLinus Torvalds journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 27231da177e4SLinus Torvalds 2724a9dd3643SJeff Mahoney if (check_advise_trans_params(sb, journal) != 0) 2725cf3d0b81SEdward Shishkin goto free_and_return; 27261da177e4SLinus Torvalds journal->j_default_max_commit_age = journal->j_max_commit_age; 27271da177e4SLinus Torvalds 27281da177e4SLinus Torvalds if (commit_max_age != 0) { 27291da177e4SLinus Torvalds journal->j_max_commit_age = commit_max_age; 27301da177e4SLinus Torvalds journal->j_max_trans_age = commit_max_age; 27311da177e4SLinus Torvalds } 27321da177e4SLinus Torvalds 2733a9dd3643SJeff Mahoney reiserfs_info(sb, "journal params: device %s, size %u, " 27341da177e4SLinus Torvalds "journal first block %u, max trans len %u, max batch %u, " 27351da177e4SLinus Torvalds "max commit age %u, max trans age %u\n", 27361da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b), 2737a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb), 2738a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 27391da177e4SLinus Torvalds journal->j_trans_max, 27401da177e4SLinus Torvalds journal->j_max_batch, 2741bd4c625cSLinus Torvalds journal->j_max_commit_age, journal->j_max_trans_age); 27421da177e4SLinus Torvalds 27431da177e4SLinus Torvalds brelse(bhjh); 27441da177e4SLinus Torvalds 27451da177e4SLinus Torvalds journal->j_list_bitmap_index = 0; 2746a9dd3643SJeff Mahoney journal_list_init(sb); 27471da177e4SLinus Torvalds 2748bd4c625cSLinus Torvalds memset(journal->j_list_hash_table, 0, 2749bd4c625cSLinus Torvalds JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 27501da177e4SLinus Torvalds 27511da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_dirty_buffers); 27521da177e4SLinus Torvalds spin_lock_init(&journal->j_dirty_buffers_lock); 27531da177e4SLinus Torvalds 27541da177e4SLinus Torvalds journal->j_start = 0; 27551da177e4SLinus Torvalds journal->j_len = 0; 27561da177e4SLinus Torvalds journal->j_len_alloc = 0; 27571da177e4SLinus Torvalds atomic_set(&(journal->j_wcount), 0); 27581da177e4SLinus Torvalds atomic_set(&(journal->j_async_throttle), 0); 27591da177e4SLinus Torvalds journal->j_bcount = 0; 27601da177e4SLinus Torvalds journal->j_trans_start_time = 0; 27611da177e4SLinus Torvalds journal->j_last = NULL; 27621da177e4SLinus Torvalds journal->j_first = NULL; 27631da177e4SLinus Torvalds init_waitqueue_head(&(journal->j_join_wait)); 2764f68215c4SJeff Mahoney mutex_init(&journal->j_mutex); 2765afe70259SJeff Mahoney mutex_init(&journal->j_flush_mutex); 27661da177e4SLinus Torvalds 27671da177e4SLinus Torvalds journal->j_trans_id = 10; 27681da177e4SLinus Torvalds journal->j_mount_id = 10; 27691da177e4SLinus Torvalds journal->j_state = 0; 27701da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 0); 27711da177e4SLinus Torvalds journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 27721da177e4SLinus Torvalds journal->j_cnode_free_orig = journal->j_cnode_free_list; 27731da177e4SLinus Torvalds journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 27741da177e4SLinus Torvalds journal->j_cnode_used = 0; 27751da177e4SLinus Torvalds journal->j_must_wait = 0; 27761da177e4SLinus Torvalds 2777576f6d79SJeff Mahoney if (journal->j_cnode_free == 0) { 2778a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2004", "Journal cnode memory " 2779576f6d79SJeff Mahoney "allocation failed (%ld bytes). Journal is " 2780576f6d79SJeff Mahoney "too large for available memory. Usually " 2781576f6d79SJeff Mahoney "this is due to a journal that is too large.", 2782576f6d79SJeff Mahoney sizeof (struct reiserfs_journal_cnode) * num_cnodes); 2783576f6d79SJeff Mahoney goto free_and_return; 2784576f6d79SJeff Mahoney } 2785576f6d79SJeff Mahoney 2786a9dd3643SJeff Mahoney init_journal_hash(sb); 27871da177e4SLinus Torvalds jl = journal->j_current_jl; 278837c69b98SFrederic Weisbecker 278937c69b98SFrederic Weisbecker /* 279037c69b98SFrederic Weisbecker * get_list_bitmap() may call flush_commit_list() which 279137c69b98SFrederic Weisbecker * requires the lock. Calling flush_commit_list() shouldn't happen 279237c69b98SFrederic Weisbecker * this early but I like to be paranoid. 279337c69b98SFrederic Weisbecker */ 279437c69b98SFrederic Weisbecker reiserfs_write_lock(sb); 2795a9dd3643SJeff Mahoney jl->j_list_bitmap = get_list_bitmap(sb, jl); 279637c69b98SFrederic Weisbecker reiserfs_write_unlock(sb); 27971da177e4SLinus Torvalds if (!jl->j_list_bitmap) { 2798a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2005", 279945b03d5eSJeff Mahoney "get_list_bitmap failed for journal list 0"); 28001da177e4SLinus Torvalds goto free_and_return; 28011da177e4SLinus Torvalds } 280237c69b98SFrederic Weisbecker 280337c69b98SFrederic Weisbecker ret = journal_read(sb); 280437c69b98SFrederic Weisbecker if (ret < 0) { 2805a9dd3643SJeff Mahoney reiserfs_warning(sb, "reiserfs-2006", 280645b03d5eSJeff Mahoney "Replay Failure, unable to mount"); 28071da177e4SLinus Torvalds goto free_and_return; 28081da177e4SLinus Torvalds } 28091da177e4SLinus Torvalds 28101da177e4SLinus Torvalds reiserfs_mounted_fs_count++; 281137c69b98SFrederic Weisbecker if (reiserfs_mounted_fs_count <= 1) 281228aadf51STejun Heo commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); 28131da177e4SLinus Torvalds 2814c4028958SDavid Howells INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2815a9dd3643SJeff Mahoney journal->j_work_sb = sb; 28161da177e4SLinus Torvalds return 0; 28171da177e4SLinus Torvalds free_and_return: 2818a9dd3643SJeff Mahoney free_journal_ram(sb); 28191da177e4SLinus Torvalds return 1; 28201da177e4SLinus Torvalds } 28211da177e4SLinus Torvalds 28221da177e4SLinus Torvalds /* 28231da177e4SLinus Torvalds ** test for a polite end of the current transaction. Used by file_write, and should 28241da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single 28251da177e4SLinus Torvalds ** transaction 28261da177e4SLinus Torvalds */ 2827bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th, 2828bd4c625cSLinus Torvalds int new_alloc) 2829bd4c625cSLinus Torvalds { 28301da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 28311da177e4SLinus Torvalds time_t now = get_seconds(); 28321da177e4SLinus Torvalds /* cannot restart while nested */ 28331da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 28341da177e4SLinus Torvalds if (th->t_refcount > 1) 28351da177e4SLinus Torvalds return 0; 28361da177e4SLinus Torvalds if (journal->j_must_wait > 0 || 28371da177e4SLinus Torvalds (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || 28381da177e4SLinus Torvalds atomic_read(&(journal->j_jlock)) || 28391da177e4SLinus Torvalds (now - journal->j_trans_start_time) > journal->j_max_trans_age || 28401da177e4SLinus Torvalds journal->j_cnode_free < (journal->j_trans_max * 3)) { 28411da177e4SLinus Torvalds return 1; 28421da177e4SLinus Torvalds } 2843b18c1c6eSDavidlohr Bueso 28446ae1ea44SChris Mason journal->j_len_alloc += new_alloc; 28456ae1ea44SChris Mason th->t_blocks_allocated += new_alloc ; 28461da177e4SLinus Torvalds return 0; 28471da177e4SLinus Torvalds } 28481da177e4SLinus Torvalds 2849b18c1c6eSDavidlohr Bueso /* this must be called inside a transaction 28501da177e4SLinus Torvalds */ 2851bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2852bd4c625cSLinus Torvalds { 28531da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 28541da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 28551da177e4SLinus Torvalds journal->j_must_wait = 1; 28561da177e4SLinus Torvalds set_bit(J_WRITERS_BLOCKED, &journal->j_state); 28571da177e4SLinus Torvalds return; 28581da177e4SLinus Torvalds } 28591da177e4SLinus Torvalds 2860b18c1c6eSDavidlohr Bueso /* this must be called without a transaction started 28611da177e4SLinus Torvalds */ 2862bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s) 2863bd4c625cSLinus Torvalds { 28641da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 28651da177e4SLinus Torvalds clear_bit(J_WRITERS_BLOCKED, &journal->j_state); 28661da177e4SLinus Torvalds wake_up(&journal->j_join_wait); 28671da177e4SLinus Torvalds } 28681da177e4SLinus Torvalds 2869b18c1c6eSDavidlohr Bueso /* this must be called without a transaction started 28701da177e4SLinus Torvalds */ 2871bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s) 2872bd4c625cSLinus Torvalds { 28731da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 28741da177e4SLinus Torvalds wait_event(journal->j_join_wait, 28751da177e4SLinus Torvalds !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); 28761da177e4SLinus Torvalds } 28771da177e4SLinus Torvalds 2878bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s) 2879bd4c625cSLinus Torvalds { 28801da177e4SLinus Torvalds wait_queue_t wait; 28811da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 28821da177e4SLinus Torvalds set_bit(J_WRITERS_QUEUED, &journal->j_state); 28831da177e4SLinus Torvalds 28841da177e4SLinus Torvalds /* 28851da177e4SLinus Torvalds * we don't want to use wait_event here because 28861da177e4SLinus Torvalds * we only want to wait once. 28871da177e4SLinus Torvalds */ 28881da177e4SLinus Torvalds init_waitqueue_entry(&wait, current); 28891da177e4SLinus Torvalds add_wait_queue(&journal->j_join_wait, &wait); 28901da177e4SLinus Torvalds set_current_state(TASK_UNINTERRUPTIBLE); 28918ebc4232SFrederic Weisbecker if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { 2892278f6679SJeff Mahoney int depth = reiserfs_write_unlock_nested(s); 28931da177e4SLinus Torvalds schedule(); 2894278f6679SJeff Mahoney reiserfs_write_lock_nested(s, depth); 28958ebc4232SFrederic Weisbecker } 28965ab2f7e0SMilind Arun Choudhary __set_current_state(TASK_RUNNING); 28971da177e4SLinus Torvalds remove_wait_queue(&journal->j_join_wait, &wait); 28981da177e4SLinus Torvalds } 28991da177e4SLinus Torvalds 2900bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s) 2901bd4c625cSLinus Torvalds { 29021da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 29031da177e4SLinus Torvalds if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) 29041da177e4SLinus Torvalds wake_up(&journal->j_join_wait); 29051da177e4SLinus Torvalds } 29061da177e4SLinus Torvalds 2907600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) 29081da177e4SLinus Torvalds { 29091da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 29101da177e4SLinus Torvalds unsigned long bcount = journal->j_bcount; 29111da177e4SLinus Torvalds while (1) { 2912278f6679SJeff Mahoney int depth; 2913278f6679SJeff Mahoney 2914278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(sb); 2915041e0e3bSNishanth Aravamudan schedule_timeout_uninterruptible(1); 2916278f6679SJeff Mahoney reiserfs_write_lock_nested(sb, depth); 2917278f6679SJeff Mahoney 29181da177e4SLinus Torvalds journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 29191da177e4SLinus Torvalds while ((atomic_read(&journal->j_wcount) > 0 || 29201da177e4SLinus Torvalds atomic_read(&journal->j_jlock)) && 29211da177e4SLinus Torvalds journal->j_trans_id == trans_id) { 29221da177e4SLinus Torvalds queue_log_writer(sb); 29231da177e4SLinus Torvalds } 29241da177e4SLinus Torvalds if (journal->j_trans_id != trans_id) 29251da177e4SLinus Torvalds break; 29261da177e4SLinus Torvalds if (bcount == journal->j_bcount) 29271da177e4SLinus Torvalds break; 29281da177e4SLinus Torvalds bcount = journal->j_bcount; 29291da177e4SLinus Torvalds } 29301da177e4SLinus Torvalds } 29311da177e4SLinus Torvalds 29321da177e4SLinus Torvalds /* join == true if you must join an existing transaction. 29331da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish 29341da177e4SLinus Torvalds ** 29351da177e4SLinus Torvalds ** this will block until the transaction is joinable. send the number of blocks you 29361da177e4SLinus Torvalds ** expect to use in nblocks. 29371da177e4SLinus Torvalds */ 2938bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 2939a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks, 2940bd4c625cSLinus Torvalds int join) 2941bd4c625cSLinus Torvalds { 29421da177e4SLinus Torvalds time_t now = get_seconds(); 2943600ed416SJeff Mahoney unsigned int old_trans_id; 2944a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 29451da177e4SLinus Torvalds struct reiserfs_transaction_handle myth; 29461da177e4SLinus Torvalds int sched_count = 0; 29471da177e4SLinus Torvalds int retval; 2948278f6679SJeff Mahoney int depth; 29491da177e4SLinus Torvalds 2950a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "journal_begin"); 295114a61442SEric Sesterhenn BUG_ON(nblocks > journal->j_trans_max); 29521da177e4SLinus Torvalds 2953a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.journal_being); 29541da177e4SLinus Torvalds /* set here for journal_join */ 29551da177e4SLinus Torvalds th->t_refcount = 1; 2956a9dd3643SJeff Mahoney th->t_super = sb; 29571da177e4SLinus Torvalds 29581da177e4SLinus Torvalds relock: 2959a9dd3643SJeff Mahoney lock_journal(sb); 29601da177e4SLinus Torvalds if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { 2961a9dd3643SJeff Mahoney unlock_journal(sb); 29621da177e4SLinus Torvalds retval = journal->j_errno; 29631da177e4SLinus Torvalds goto out_fail; 29641da177e4SLinus Torvalds } 29651da177e4SLinus Torvalds journal->j_bcount++; 29661da177e4SLinus Torvalds 29671da177e4SLinus Torvalds if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 2968a9dd3643SJeff Mahoney unlock_journal(sb); 2969278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(sb); 2970a9dd3643SJeff Mahoney reiserfs_wait_on_write_block(sb); 2971278f6679SJeff Mahoney reiserfs_write_lock_nested(sb, depth); 2972a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.journal_relock_writers); 29731da177e4SLinus Torvalds goto relock; 29741da177e4SLinus Torvalds } 29751da177e4SLinus Torvalds now = get_seconds(); 29761da177e4SLinus Torvalds 29771da177e4SLinus Torvalds /* if there is no room in the journal OR 29781da177e4SLinus Torvalds ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 29791da177e4SLinus Torvalds ** we don't sleep if there aren't other writers 29801da177e4SLinus Torvalds */ 29811da177e4SLinus Torvalds 29821da177e4SLinus Torvalds if ((!join && journal->j_must_wait > 0) || 2983bd4c625cSLinus Torvalds (!join 2984bd4c625cSLinus Torvalds && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) 2985bd4c625cSLinus Torvalds || (!join && atomic_read(&journal->j_wcount) > 0 2986bd4c625cSLinus Torvalds && journal->j_trans_start_time > 0 2987bd4c625cSLinus Torvalds && (now - journal->j_trans_start_time) > 2988bd4c625cSLinus Torvalds journal->j_max_trans_age) || (!join 2989bd4c625cSLinus Torvalds && atomic_read(&journal->j_jlock)) 2990bd4c625cSLinus Torvalds || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 29911da177e4SLinus Torvalds 29921da177e4SLinus Torvalds old_trans_id = journal->j_trans_id; 2993a9dd3643SJeff Mahoney unlock_journal(sb); /* allow others to finish this transaction */ 29941da177e4SLinus Torvalds 29951da177e4SLinus Torvalds if (!join && (journal->j_len_alloc + nblocks + 2) >= 29961da177e4SLinus Torvalds journal->j_max_batch && 2997bd4c625cSLinus Torvalds ((journal->j_len + nblocks + 2) * 100) < 2998bd4c625cSLinus Torvalds (journal->j_len_alloc * 75)) { 29991da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) > 10) { 30001da177e4SLinus Torvalds sched_count++; 3001a9dd3643SJeff Mahoney queue_log_writer(sb); 30021da177e4SLinus Torvalds goto relock; 30031da177e4SLinus Torvalds } 30041da177e4SLinus Torvalds } 30051da177e4SLinus Torvalds /* don't mess with joining the transaction if all we have to do is 30061da177e4SLinus Torvalds * wait for someone else to do a commit 30071da177e4SLinus Torvalds */ 30081da177e4SLinus Torvalds if (atomic_read(&journal->j_jlock)) { 30091da177e4SLinus Torvalds while (journal->j_trans_id == old_trans_id && 30101da177e4SLinus Torvalds atomic_read(&journal->j_jlock)) { 3011a9dd3643SJeff Mahoney queue_log_writer(sb); 30121da177e4SLinus Torvalds } 30131da177e4SLinus Torvalds goto relock; 30141da177e4SLinus Torvalds } 3015a9dd3643SJeff Mahoney retval = journal_join(&myth, sb, 1); 30161da177e4SLinus Torvalds if (retval) 30171da177e4SLinus Torvalds goto out_fail; 30181da177e4SLinus Torvalds 30191da177e4SLinus Torvalds /* someone might have ended the transaction while we joined */ 30201da177e4SLinus Torvalds if (old_trans_id != journal->j_trans_id) { 3021a9dd3643SJeff Mahoney retval = do_journal_end(&myth, sb, 1, 0); 30221da177e4SLinus Torvalds } else { 3023a9dd3643SJeff Mahoney retval = do_journal_end(&myth, sb, 1, COMMIT_NOW); 30241da177e4SLinus Torvalds } 30251da177e4SLinus Torvalds 30261da177e4SLinus Torvalds if (retval) 30271da177e4SLinus Torvalds goto out_fail; 30281da177e4SLinus Torvalds 3029a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.journal_relock_wcount); 30301da177e4SLinus Torvalds goto relock; 30311da177e4SLinus Torvalds } 30321da177e4SLinus Torvalds /* we are the first writer, set trans_id */ 30331da177e4SLinus Torvalds if (journal->j_trans_start_time == 0) { 30341da177e4SLinus Torvalds journal->j_trans_start_time = get_seconds(); 30351da177e4SLinus Torvalds } 30361da177e4SLinus Torvalds atomic_inc(&(journal->j_wcount)); 30371da177e4SLinus Torvalds journal->j_len_alloc += nblocks; 30381da177e4SLinus Torvalds th->t_blocks_logged = 0; 30391da177e4SLinus Torvalds th->t_blocks_allocated = nblocks; 30401da177e4SLinus Torvalds th->t_trans_id = journal->j_trans_id; 3041a9dd3643SJeff Mahoney unlock_journal(sb); 30421da177e4SLinus Torvalds INIT_LIST_HEAD(&th->t_list); 30431da177e4SLinus Torvalds return 0; 30441da177e4SLinus Torvalds 30451da177e4SLinus Torvalds out_fail: 30461da177e4SLinus Torvalds memset(th, 0, sizeof(*th)); 30471da177e4SLinus Torvalds /* Re-set th->t_super, so we can properly keep track of how many 30481da177e4SLinus Torvalds * persistent transactions there are. We need to do this so if this 30491da177e4SLinus Torvalds * call is part of a failed restart_transaction, we can free it later */ 3050a9dd3643SJeff Mahoney th->t_super = sb; 30511da177e4SLinus Torvalds return retval; 30521da177e4SLinus Torvalds } 30531da177e4SLinus Torvalds 3054bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct 3055bd4c625cSLinus Torvalds super_block 3056bd4c625cSLinus Torvalds *s, 3057bd4c625cSLinus Torvalds int nblocks) 3058bd4c625cSLinus Torvalds { 30591da177e4SLinus Torvalds int ret; 30601da177e4SLinus Torvalds struct reiserfs_transaction_handle *th; 30611da177e4SLinus Torvalds 30621da177e4SLinus Torvalds /* if we're nesting into an existing transaction. It will be 30631da177e4SLinus Torvalds ** persistent on its own 30641da177e4SLinus Torvalds */ 30651da177e4SLinus Torvalds if (reiserfs_transaction_running(s)) { 30661da177e4SLinus Torvalds th = current->journal_info; 30671da177e4SLinus Torvalds th->t_refcount++; 306814a61442SEric Sesterhenn BUG_ON(th->t_refcount < 2); 306914a61442SEric Sesterhenn 30701da177e4SLinus Torvalds return th; 30711da177e4SLinus Torvalds } 3072d739b42bSPekka Enberg th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); 30731da177e4SLinus Torvalds if (!th) 30741da177e4SLinus Torvalds return NULL; 30751da177e4SLinus Torvalds ret = journal_begin(th, s, nblocks); 30761da177e4SLinus Torvalds if (ret) { 3077d739b42bSPekka Enberg kfree(th); 30781da177e4SLinus Torvalds return NULL; 30791da177e4SLinus Torvalds } 30801da177e4SLinus Torvalds 30811da177e4SLinus Torvalds SB_JOURNAL(s)->j_persistent_trans++; 30821da177e4SLinus Torvalds return th; 30831da177e4SLinus Torvalds } 30841da177e4SLinus Torvalds 3085bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) 3086bd4c625cSLinus Torvalds { 30871da177e4SLinus Torvalds struct super_block *s = th->t_super; 30881da177e4SLinus Torvalds int ret = 0; 30891da177e4SLinus Torvalds if (th->t_trans_id) 30901da177e4SLinus Torvalds ret = journal_end(th, th->t_super, th->t_blocks_allocated); 30911da177e4SLinus Torvalds else 30921da177e4SLinus Torvalds ret = -EIO; 30931da177e4SLinus Torvalds if (th->t_refcount == 0) { 30941da177e4SLinus Torvalds SB_JOURNAL(s)->j_persistent_trans--; 3095d739b42bSPekka Enberg kfree(th); 30961da177e4SLinus Torvalds } 30971da177e4SLinus Torvalds return ret; 30981da177e4SLinus Torvalds } 30991da177e4SLinus Torvalds 3100bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th, 3101a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3102bd4c625cSLinus Torvalds { 31031da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 31041da177e4SLinus Torvalds 31051da177e4SLinus Torvalds /* this keeps do_journal_end from NULLing out the current->journal_info 31061da177e4SLinus Torvalds ** pointer 31071da177e4SLinus Torvalds */ 31081da177e4SLinus Torvalds th->t_handle_save = cur_th; 310914a61442SEric Sesterhenn BUG_ON(cur_th && cur_th->t_refcount > 1); 3110a9dd3643SJeff Mahoney return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN); 31111da177e4SLinus Torvalds } 31121da177e4SLinus Torvalds 3113bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th, 3114a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3115bd4c625cSLinus Torvalds { 31161da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 31171da177e4SLinus Torvalds 31181da177e4SLinus Torvalds /* this keeps do_journal_end from NULLing out the current->journal_info 31191da177e4SLinus Torvalds ** pointer 31201da177e4SLinus Torvalds */ 31211da177e4SLinus Torvalds th->t_handle_save = cur_th; 312214a61442SEric Sesterhenn BUG_ON(cur_th && cur_th->t_refcount > 1); 3123a9dd3643SJeff Mahoney return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT); 31241da177e4SLinus Torvalds } 31251da177e4SLinus Torvalds 3126bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th, 3127a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3128bd4c625cSLinus Torvalds { 31291da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 31301da177e4SLinus Torvalds int ret; 31311da177e4SLinus Torvalds 31321da177e4SLinus Torvalds th->t_handle_save = NULL; 31331da177e4SLinus Torvalds if (cur_th) { 31341da177e4SLinus Torvalds /* we are nesting into the current transaction */ 3135a9dd3643SJeff Mahoney if (cur_th->t_super == sb) { 31361da177e4SLinus Torvalds BUG_ON(!cur_th->t_refcount); 31371da177e4SLinus Torvalds cur_th->t_refcount++; 31381da177e4SLinus Torvalds memcpy(th, cur_th, sizeof(*th)); 31391da177e4SLinus Torvalds if (th->t_refcount <= 1) 3140a9dd3643SJeff Mahoney reiserfs_warning(sb, "reiserfs-2005", 314145b03d5eSJeff Mahoney "BAD: refcount <= 1, but " 314245b03d5eSJeff Mahoney "journal_info != 0"); 31431da177e4SLinus Torvalds return 0; 31441da177e4SLinus Torvalds } else { 31451da177e4SLinus Torvalds /* we've ended up with a handle from a different filesystem. 31461da177e4SLinus Torvalds ** save it and restore on journal_end. This should never 31471da177e4SLinus Torvalds ** really happen... 31481da177e4SLinus Torvalds */ 3149a9dd3643SJeff Mahoney reiserfs_warning(sb, "clm-2100", 315045b03d5eSJeff Mahoney "nesting info a different FS"); 31511da177e4SLinus Torvalds th->t_handle_save = current->journal_info; 31521da177e4SLinus Torvalds current->journal_info = th; 31531da177e4SLinus Torvalds } 31541da177e4SLinus Torvalds } else { 31551da177e4SLinus Torvalds current->journal_info = th; 31561da177e4SLinus Torvalds } 3157a9dd3643SJeff Mahoney ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); 315814a61442SEric Sesterhenn BUG_ON(current->journal_info != th); 31591da177e4SLinus Torvalds 31601da177e4SLinus Torvalds /* I guess this boils down to being the reciprocal of clm-2100 above. 31611da177e4SLinus Torvalds * If do_journal_begin_r fails, we need to put it back, since journal_end 31621da177e4SLinus Torvalds * won't be called to do it. */ 31631da177e4SLinus Torvalds if (ret) 31641da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 31651da177e4SLinus Torvalds else 31661da177e4SLinus Torvalds BUG_ON(!th->t_refcount); 31671da177e4SLinus Torvalds 31681da177e4SLinus Torvalds return ret; 31691da177e4SLinus Torvalds } 31701da177e4SLinus Torvalds 31711da177e4SLinus Torvalds /* 31721da177e4SLinus Torvalds ** puts bh into the current transaction. If it was already there, reorders removes the 31731da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). 31741da177e4SLinus Torvalds ** 31751da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the 31761da177e4SLinus Torvalds ** transaction is committed. 31771da177e4SLinus Torvalds ** 31781da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 31791da177e4SLinus Torvalds */ 3180bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th, 3181a9dd3643SJeff Mahoney struct super_block *sb, struct buffer_head *bh) 3182bd4c625cSLinus Torvalds { 3183a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 31841da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn = NULL; 31851da177e4SLinus Torvalds int count_already_incd = 0; 31861da177e4SLinus Torvalds int prepared = 0; 31871da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 31881da177e4SLinus Torvalds 3189a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.mark_dirty); 31901da177e4SLinus Torvalds if (th->t_trans_id != journal->j_trans_id) { 3191c3a9c210SJeff Mahoney reiserfs_panic(th->t_super, "journal-1577", 3192c3a9c210SJeff Mahoney "handle trans id %ld != current trans id %ld", 31931da177e4SLinus Torvalds th->t_trans_id, journal->j_trans_id); 31941da177e4SLinus Torvalds } 31951da177e4SLinus Torvalds 31961da177e4SLinus Torvalds prepared = test_clear_buffer_journal_prepared(bh); 31971da177e4SLinus Torvalds clear_buffer_journal_restore_dirty(bh); 31981da177e4SLinus Torvalds /* already in this transaction, we are done */ 31991da177e4SLinus Torvalds if (buffer_journaled(bh)) { 3200a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.mark_dirty_already); 32011da177e4SLinus Torvalds return 0; 32021da177e4SLinus Torvalds } 32031da177e4SLinus Torvalds 32041da177e4SLinus Torvalds /* this must be turned into a panic instead of a warning. We can't allow 32051da177e4SLinus Torvalds ** a dirty or journal_dirty or locked buffer to be logged, as some changes 32061da177e4SLinus Torvalds ** could get to disk too early. NOT GOOD. 32071da177e4SLinus Torvalds */ 32081da177e4SLinus Torvalds if (!prepared || buffer_dirty(bh)) { 3209a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1777", 321045b03d5eSJeff Mahoney "buffer %llu bad state " 32111da177e4SLinus Torvalds "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", 3212bd4c625cSLinus Torvalds (unsigned long long)bh->b_blocknr, 3213bd4c625cSLinus Torvalds prepared ? ' ' : '!', 32141da177e4SLinus Torvalds buffer_locked(bh) ? ' ' : '!', 32151da177e4SLinus Torvalds buffer_dirty(bh) ? ' ' : '!', 32161da177e4SLinus Torvalds buffer_journal_dirty(bh) ? ' ' : '!'); 32171da177e4SLinus Torvalds } 32181da177e4SLinus Torvalds 32191da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) <= 0) { 3220a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1409", 322145b03d5eSJeff Mahoney "returning because j_wcount was %d", 3222bd4c625cSLinus Torvalds atomic_read(&(journal->j_wcount))); 32231da177e4SLinus Torvalds return 1; 32241da177e4SLinus Torvalds } 32251da177e4SLinus Torvalds /* this error means I've screwed up, and we've overflowed the transaction. 32261da177e4SLinus Torvalds ** Nothing can be done here, except make the FS readonly or panic. 32271da177e4SLinus Torvalds */ 32281da177e4SLinus Torvalds if (journal->j_len >= journal->j_trans_max) { 3229c3a9c210SJeff Mahoney reiserfs_panic(th->t_super, "journal-1413", 3230c3a9c210SJeff Mahoney "j_len (%lu) is too big", 3231bd4c625cSLinus Torvalds journal->j_len); 32321da177e4SLinus Torvalds } 32331da177e4SLinus Torvalds 32341da177e4SLinus Torvalds if (buffer_journal_dirty(bh)) { 32351da177e4SLinus Torvalds count_already_incd = 1; 3236a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.mark_dirty_notjournal); 32371da177e4SLinus Torvalds clear_buffer_journal_dirty(bh); 32381da177e4SLinus Torvalds } 32391da177e4SLinus Torvalds 32401da177e4SLinus Torvalds if (journal->j_len > journal->j_len_alloc) { 32411da177e4SLinus Torvalds journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; 32421da177e4SLinus Torvalds } 32431da177e4SLinus Torvalds 32441da177e4SLinus Torvalds set_buffer_journaled(bh); 32451da177e4SLinus Torvalds 32461da177e4SLinus Torvalds /* now put this guy on the end */ 32471da177e4SLinus Torvalds if (!cn) { 3248a9dd3643SJeff Mahoney cn = get_cnode(sb); 32491da177e4SLinus Torvalds if (!cn) { 3250a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-4", "get_cnode failed!"); 32511da177e4SLinus Torvalds } 32521da177e4SLinus Torvalds 32531da177e4SLinus Torvalds if (th->t_blocks_logged == th->t_blocks_allocated) { 32541da177e4SLinus Torvalds th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; 32551da177e4SLinus Torvalds journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; 32561da177e4SLinus Torvalds } 32571da177e4SLinus Torvalds th->t_blocks_logged++; 32581da177e4SLinus Torvalds journal->j_len++; 32591da177e4SLinus Torvalds 32601da177e4SLinus Torvalds cn->bh = bh; 32611da177e4SLinus Torvalds cn->blocknr = bh->b_blocknr; 3262a9dd3643SJeff Mahoney cn->sb = sb; 32631da177e4SLinus Torvalds cn->jlist = NULL; 32641da177e4SLinus Torvalds insert_journal_hash(journal->j_hash_table, cn); 32651da177e4SLinus Torvalds if (!count_already_incd) { 32661da177e4SLinus Torvalds get_bh(bh); 32671da177e4SLinus Torvalds } 32681da177e4SLinus Torvalds } 32691da177e4SLinus Torvalds cn->next = NULL; 32701da177e4SLinus Torvalds cn->prev = journal->j_last; 32711da177e4SLinus Torvalds cn->bh = bh; 32721da177e4SLinus Torvalds if (journal->j_last) { 32731da177e4SLinus Torvalds journal->j_last->next = cn; 32741da177e4SLinus Torvalds journal->j_last = cn; 32751da177e4SLinus Torvalds } else { 32761da177e4SLinus Torvalds journal->j_first = cn; 32771da177e4SLinus Torvalds journal->j_last = cn; 32781da177e4SLinus Torvalds } 3279033369d1SArtem Bityutskiy reiserfs_schedule_old_flush(sb); 32801da177e4SLinus Torvalds return 0; 32811da177e4SLinus Torvalds } 32821da177e4SLinus Torvalds 3283bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th, 3284a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3285bd4c625cSLinus Torvalds { 32861da177e4SLinus Torvalds if (!current->journal_info && th->t_refcount > 1) 3287a9dd3643SJeff Mahoney reiserfs_warning(sb, "REISER-NESTING", 328845b03d5eSJeff Mahoney "th NULL, refcount %d", th->t_refcount); 32891da177e4SLinus Torvalds 32901da177e4SLinus Torvalds if (!th->t_trans_id) { 32911da177e4SLinus Torvalds WARN_ON(1); 32921da177e4SLinus Torvalds return -EIO; 32931da177e4SLinus Torvalds } 32941da177e4SLinus Torvalds 32951da177e4SLinus Torvalds th->t_refcount--; 32961da177e4SLinus Torvalds if (th->t_refcount > 0) { 3297bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *cur_th = 3298bd4c625cSLinus Torvalds current->journal_info; 32991da177e4SLinus Torvalds 33001da177e4SLinus Torvalds /* we aren't allowed to close a nested transaction on a different 33011da177e4SLinus Torvalds ** filesystem from the one in the task struct 33021da177e4SLinus Torvalds */ 330314a61442SEric Sesterhenn BUG_ON(cur_th->t_super != th->t_super); 33041da177e4SLinus Torvalds 33051da177e4SLinus Torvalds if (th != cur_th) { 33061da177e4SLinus Torvalds memcpy(current->journal_info, th, sizeof(*th)); 33071da177e4SLinus Torvalds th->t_trans_id = 0; 33081da177e4SLinus Torvalds } 33091da177e4SLinus Torvalds return 0; 33101da177e4SLinus Torvalds } else { 3311a9dd3643SJeff Mahoney return do_journal_end(th, sb, nblocks, 0); 33121da177e4SLinus Torvalds } 33131da177e4SLinus Torvalds } 33141da177e4SLinus Torvalds 33151da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters. 33161da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list 33171da177e4SLinus Torvalds ** 33181da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted 33191da177e4SLinus Torvalds ** 33201da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise 33211da177e4SLinus Torvalds */ 3322a9dd3643SJeff Mahoney static int remove_from_transaction(struct super_block *sb, 3323bd4c625cSLinus Torvalds b_blocknr_t blocknr, int already_cleaned) 3324bd4c625cSLinus Torvalds { 33251da177e4SLinus Torvalds struct buffer_head *bh; 33261da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 3327a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 33281da177e4SLinus Torvalds int ret = 0; 33291da177e4SLinus Torvalds 3330a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); 33311da177e4SLinus Torvalds if (!cn || !cn->bh) { 33321da177e4SLinus Torvalds return ret; 33331da177e4SLinus Torvalds } 33341da177e4SLinus Torvalds bh = cn->bh; 33351da177e4SLinus Torvalds if (cn->prev) { 33361da177e4SLinus Torvalds cn->prev->next = cn->next; 33371da177e4SLinus Torvalds } 33381da177e4SLinus Torvalds if (cn->next) { 33391da177e4SLinus Torvalds cn->next->prev = cn->prev; 33401da177e4SLinus Torvalds } 33411da177e4SLinus Torvalds if (cn == journal->j_first) { 33421da177e4SLinus Torvalds journal->j_first = cn->next; 33431da177e4SLinus Torvalds } 33441da177e4SLinus Torvalds if (cn == journal->j_last) { 33451da177e4SLinus Torvalds journal->j_last = cn->prev; 33461da177e4SLinus Torvalds } 33471da177e4SLinus Torvalds if (bh) 3348a9dd3643SJeff Mahoney remove_journal_hash(sb, journal->j_hash_table, NULL, 3349bd4c625cSLinus Torvalds bh->b_blocknr, 0); 33501da177e4SLinus Torvalds clear_buffer_journaled(bh); /* don't log this one */ 33511da177e4SLinus Torvalds 33521da177e4SLinus Torvalds if (!already_cleaned) { 33531da177e4SLinus Torvalds clear_buffer_journal_dirty(bh); 33541da177e4SLinus Torvalds clear_buffer_dirty(bh); 33551da177e4SLinus Torvalds clear_buffer_journal_test(bh); 33561da177e4SLinus Torvalds put_bh(bh); 33571da177e4SLinus Torvalds if (atomic_read(&(bh->b_count)) < 0) { 3358a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1752", 335945b03d5eSJeff Mahoney "b_count < 0"); 33601da177e4SLinus Torvalds } 33611da177e4SLinus Torvalds ret = 1; 33621da177e4SLinus Torvalds } 33631da177e4SLinus Torvalds journal->j_len--; 33641da177e4SLinus Torvalds journal->j_len_alloc--; 3365a9dd3643SJeff Mahoney free_cnode(sb, cn); 33661da177e4SLinus Torvalds return ret; 33671da177e4SLinus Torvalds } 33681da177e4SLinus Torvalds 33691da177e4SLinus Torvalds /* 33701da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the 33710779bf2dSMatt LaPlante ** transactions that include it are committed to disk. 33721da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty, 33731da177e4SLinus Torvalds ** and 0 if you aren't 33741da177e4SLinus Torvalds ** 33751da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log 33761da177e4SLinus Torvalds ** blocks for a given transaction on disk 33771da177e4SLinus Torvalds ** 33781da177e4SLinus Torvalds */ 3379bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn) 3380bd4c625cSLinus Torvalds { 33811da177e4SLinus Torvalds struct super_block *sb = cn->sb; 33821da177e4SLinus Torvalds b_blocknr_t blocknr = cn->blocknr; 33831da177e4SLinus Torvalds struct reiserfs_journal_cnode *cur = cn->hprev; 33841da177e4SLinus Torvalds int can_dirty = 1; 33851da177e4SLinus Torvalds 33861da177e4SLinus Torvalds /* first test hprev. These are all newer than cn, so any node here 33871da177e4SLinus Torvalds ** with the same block number and dev means this node can't be sent 33881da177e4SLinus Torvalds ** to disk right now. 33891da177e4SLinus Torvalds */ 33901da177e4SLinus Torvalds while (cur && can_dirty) { 33911da177e4SLinus Torvalds if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 33921da177e4SLinus Torvalds cur->blocknr == blocknr) { 33931da177e4SLinus Torvalds can_dirty = 0; 33941da177e4SLinus Torvalds } 33951da177e4SLinus Torvalds cur = cur->hprev; 33961da177e4SLinus Torvalds } 33971da177e4SLinus Torvalds /* then test hnext. These are all older than cn. As long as they 33981da177e4SLinus Torvalds ** are committed to the log, it is safe to write cn to disk 33991da177e4SLinus Torvalds */ 34001da177e4SLinus Torvalds cur = cn->hnext; 34011da177e4SLinus Torvalds while (cur && can_dirty) { 34021da177e4SLinus Torvalds if (cur->jlist && cur->jlist->j_len > 0 && 34031da177e4SLinus Torvalds atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 34041da177e4SLinus Torvalds cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { 34051da177e4SLinus Torvalds can_dirty = 0; 34061da177e4SLinus Torvalds } 34071da177e4SLinus Torvalds cur = cur->hnext; 34081da177e4SLinus Torvalds } 34091da177e4SLinus Torvalds return can_dirty; 34101da177e4SLinus Torvalds } 34111da177e4SLinus Torvalds 34121da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk 34130779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning 34141da177e4SLinus Torvalds */ 3415bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th, 3416a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3417bd4c625cSLinus Torvalds { 3418a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 34191da177e4SLinus Torvalds 34201da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 34211da177e4SLinus Torvalds /* you can sync while nested, very, very bad */ 342214a61442SEric Sesterhenn BUG_ON(th->t_refcount > 1); 34231da177e4SLinus Torvalds if (journal->j_len == 0) { 3424a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3425bd4c625cSLinus Torvalds 1); 3426a9dd3643SJeff Mahoney journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); 34271da177e4SLinus Torvalds } 3428a9dd3643SJeff Mahoney return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); 34291da177e4SLinus Torvalds } 34301da177e4SLinus Torvalds 34311da177e4SLinus Torvalds /* 34321da177e4SLinus Torvalds ** writeback the pending async commits to disk 34331da177e4SLinus Torvalds */ 3434c4028958SDavid Howells static void flush_async_commits(struct work_struct *work) 3435bd4c625cSLinus Torvalds { 3436c4028958SDavid Howells struct reiserfs_journal *journal = 3437c4028958SDavid Howells container_of(work, struct reiserfs_journal, j_work.work); 3438a9dd3643SJeff Mahoney struct super_block *sb = journal->j_work_sb; 34391da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 34401da177e4SLinus Torvalds struct list_head *entry; 34411da177e4SLinus Torvalds 34428ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 34431da177e4SLinus Torvalds if (!list_empty(&journal->j_journal_list)) { 34441da177e4SLinus Torvalds /* last entry is the youngest, commit it and you get everything */ 34451da177e4SLinus Torvalds entry = journal->j_journal_list.prev; 34461da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 3447a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 34481da177e4SLinus Torvalds } 34498ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 34501da177e4SLinus Torvalds } 34511da177e4SLinus Torvalds 34521da177e4SLinus Torvalds /* 34531da177e4SLinus Torvalds ** flushes any old transactions to disk 34541da177e4SLinus Torvalds ** ends the current transaction if it is too old 34551da177e4SLinus Torvalds */ 345625729b0eSArtem Bityutskiy void reiserfs_flush_old_commits(struct super_block *sb) 3457bd4c625cSLinus Torvalds { 34581da177e4SLinus Torvalds time_t now; 34591da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 3460a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 34611da177e4SLinus Torvalds 34621da177e4SLinus Torvalds now = get_seconds(); 34631da177e4SLinus Torvalds /* safety check so we don't flush while we are replaying the log during 34641da177e4SLinus Torvalds * mount 34651da177e4SLinus Torvalds */ 346625729b0eSArtem Bityutskiy if (list_empty(&journal->j_journal_list)) 346725729b0eSArtem Bityutskiy return; 34681da177e4SLinus Torvalds 34691da177e4SLinus Torvalds /* check the current transaction. If there are no writers, and it is 34701da177e4SLinus Torvalds * too old, finish it, and force the commit blocks to disk 34711da177e4SLinus Torvalds */ 34721da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) <= 0 && 34731da177e4SLinus Torvalds journal->j_trans_start_time > 0 && 34741da177e4SLinus Torvalds journal->j_len > 0 && 3475bd4c625cSLinus Torvalds (now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3476a9dd3643SJeff Mahoney if (!journal_join(&th, sb, 1)) { 3477a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, 3478a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb), 3479bd4c625cSLinus Torvalds 1); 3480a9dd3643SJeff Mahoney journal_mark_dirty(&th, sb, 3481a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb)); 34821da177e4SLinus Torvalds 34831da177e4SLinus Torvalds /* we're only being called from kreiserfsd, it makes no sense to do 34841da177e4SLinus Torvalds ** an async commit so that kreiserfsd can do it later 34851da177e4SLinus Torvalds */ 3486a9dd3643SJeff Mahoney do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); 34871da177e4SLinus Torvalds } 34881da177e4SLinus Torvalds } 34891da177e4SLinus Torvalds } 34901da177e4SLinus Torvalds 34911da177e4SLinus Torvalds /* 34921da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit 34931da177e4SLinus Torvalds ** 34941da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 34951da177e4SLinus Torvalds ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just 34961da177e4SLinus Torvalds ** flushes the commit list and returns 0. 34971da177e4SLinus Torvalds ** 34981da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. 34991da177e4SLinus Torvalds ** 35001da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log. 35011da177e4SLinus Torvalds */ 3502bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th, 3503a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks, 3504bd4c625cSLinus Torvalds int flags) 3505bd4c625cSLinus Torvalds { 35061da177e4SLinus Torvalds 35071da177e4SLinus Torvalds time_t now; 35081da177e4SLinus Torvalds int flush = flags & FLUSH_ALL; 35091da177e4SLinus Torvalds int commit_now = flags & COMMIT_NOW; 35101da177e4SLinus Torvalds int wait_on_commit = flags & WAIT; 35111da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 3512a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 35131da177e4SLinus Torvalds 35141da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 35151da177e4SLinus Torvalds 35161da177e4SLinus Torvalds if (th->t_trans_id != journal->j_trans_id) { 3517c3a9c210SJeff Mahoney reiserfs_panic(th->t_super, "journal-1577", 3518c3a9c210SJeff Mahoney "handle trans id %ld != current trans id %ld", 35191da177e4SLinus Torvalds th->t_trans_id, journal->j_trans_id); 35201da177e4SLinus Torvalds } 35211da177e4SLinus Torvalds 35221da177e4SLinus Torvalds journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); 35231da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 35241da177e4SLinus Torvalds atomic_dec(&(journal->j_wcount)); 35251da177e4SLinus Torvalds } 35261da177e4SLinus Torvalds 35271da177e4SLinus Torvalds /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 35281da177e4SLinus Torvalds ** will be dealt with by next transaction that actually writes something, but should be taken 35291da177e4SLinus Torvalds ** care of in this trans 35301da177e4SLinus Torvalds */ 353114a61442SEric Sesterhenn BUG_ON(journal->j_len == 0); 353214a61442SEric Sesterhenn 35331da177e4SLinus Torvalds /* if wcount > 0, and we are called to with flush or commit_now, 35341da177e4SLinus Torvalds ** we wait on j_join_wait. We will wake up when the last writer has 35351da177e4SLinus Torvalds ** finished the transaction, and started it on its way to the disk. 35361da177e4SLinus Torvalds ** Then, we flush the commit or journal list, and just return 0 35371da177e4SLinus Torvalds ** because the rest of journal end was already done for this transaction. 35381da177e4SLinus Torvalds */ 35391da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) > 0) { 35401da177e4SLinus Torvalds if (flush || commit_now) { 35411da177e4SLinus Torvalds unsigned trans_id; 35421da177e4SLinus Torvalds 35431da177e4SLinus Torvalds jl = journal->j_current_jl; 35441da177e4SLinus Torvalds trans_id = jl->j_trans_id; 35451da177e4SLinus Torvalds if (wait_on_commit) 35461da177e4SLinus Torvalds jl->j_state |= LIST_COMMIT_PENDING; 35471da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 1); 35481da177e4SLinus Torvalds if (flush) { 35491da177e4SLinus Torvalds journal->j_next_full_flush = 1; 35501da177e4SLinus Torvalds } 3551a9dd3643SJeff Mahoney unlock_journal(sb); 35521da177e4SLinus Torvalds 35531da177e4SLinus Torvalds /* sleep while the current transaction is still j_jlocked */ 35541da177e4SLinus Torvalds while (journal->j_trans_id == trans_id) { 35551da177e4SLinus Torvalds if (atomic_read(&journal->j_jlock)) { 3556a9dd3643SJeff Mahoney queue_log_writer(sb); 35571da177e4SLinus Torvalds } else { 3558a9dd3643SJeff Mahoney lock_journal(sb); 35591da177e4SLinus Torvalds if (journal->j_trans_id == trans_id) { 3560bd4c625cSLinus Torvalds atomic_set(&(journal->j_jlock), 3561bd4c625cSLinus Torvalds 1); 35621da177e4SLinus Torvalds } 3563a9dd3643SJeff Mahoney unlock_journal(sb); 35641da177e4SLinus Torvalds } 35651da177e4SLinus Torvalds } 356614a61442SEric Sesterhenn BUG_ON(journal->j_trans_id == trans_id); 356714a61442SEric Sesterhenn 3568bd4c625cSLinus Torvalds if (commit_now 3569a9dd3643SJeff Mahoney && journal_list_still_alive(sb, trans_id) 3570bd4c625cSLinus Torvalds && wait_on_commit) { 3571a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 35721da177e4SLinus Torvalds } 35731da177e4SLinus Torvalds return 0; 35741da177e4SLinus Torvalds } 3575a9dd3643SJeff Mahoney unlock_journal(sb); 35761da177e4SLinus Torvalds return 0; 35771da177e4SLinus Torvalds } 35781da177e4SLinus Torvalds 35791da177e4SLinus Torvalds /* deal with old transactions where we are the last writers */ 35801da177e4SLinus Torvalds now = get_seconds(); 35811da177e4SLinus Torvalds if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { 35821da177e4SLinus Torvalds commit_now = 1; 35831da177e4SLinus Torvalds journal->j_next_async_flush = 1; 35841da177e4SLinus Torvalds } 35851da177e4SLinus Torvalds /* don't batch when someone is waiting on j_join_wait */ 35861da177e4SLinus Torvalds /* don't batch when syncing the commit or flushing the whole trans */ 3587bd4c625cSLinus Torvalds if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) 3588bd4c625cSLinus Torvalds && !flush && !commit_now && (journal->j_len < journal->j_max_batch) 3589bd4c625cSLinus Torvalds && journal->j_len_alloc < journal->j_max_batch 3590bd4c625cSLinus Torvalds && journal->j_cnode_free > (journal->j_trans_max * 3)) { 35911da177e4SLinus Torvalds journal->j_bcount++; 3592a9dd3643SJeff Mahoney unlock_journal(sb); 35931da177e4SLinus Torvalds return 0; 35941da177e4SLinus Torvalds } 35951da177e4SLinus Torvalds 3596a9dd3643SJeff Mahoney if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) { 3597a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-003", 3598c3a9c210SJeff Mahoney "j_start (%ld) is too high", 3599bd4c625cSLinus Torvalds journal->j_start); 36001da177e4SLinus Torvalds } 36011da177e4SLinus Torvalds return 1; 36021da177e4SLinus Torvalds } 36031da177e4SLinus Torvalds 36041da177e4SLinus Torvalds /* 36051da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe. 36061da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. 36071da177e4SLinus Torvalds ** 36081da177e4SLinus Torvalds ** otherwise: 36091da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes 36101da177e4SLinus Torvalds ** before this transaction has finished. 36111da177e4SLinus Torvalds ** 36121da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with 36131da177e4SLinus Torvalds ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, 36141da177e4SLinus Torvalds ** the block can't be reallocated yet. 36151da177e4SLinus Torvalds ** 36161da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 36171da177e4SLinus Torvalds */ 3618bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th, 3619a9dd3643SJeff Mahoney struct super_block *sb, b_blocknr_t blocknr) 3620bd4c625cSLinus Torvalds { 3621a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 36221da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn = NULL; 36231da177e4SLinus Torvalds struct buffer_head *bh = NULL; 36241da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = NULL; 36251da177e4SLinus Torvalds int cleaned = 0; 36261da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 36271da177e4SLinus Torvalds 3628a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); 36291da177e4SLinus Torvalds if (cn && cn->bh) { 36301da177e4SLinus Torvalds bh = cn->bh; 36311da177e4SLinus Torvalds get_bh(bh); 36321da177e4SLinus Torvalds } 36331da177e4SLinus Torvalds /* if it is journal new, we just remove it from this transaction */ 36341da177e4SLinus Torvalds if (bh && buffer_journal_new(bh)) { 36351da177e4SLinus Torvalds clear_buffer_journal_new(bh); 36361da177e4SLinus Torvalds clear_prepared_bits(bh); 36371da177e4SLinus Torvalds reiserfs_clean_and_file_buffer(bh); 3638a9dd3643SJeff Mahoney cleaned = remove_from_transaction(sb, blocknr, cleaned); 36391da177e4SLinus Torvalds } else { 36401da177e4SLinus Torvalds /* set the bit for this block in the journal bitmap for this transaction */ 36411da177e4SLinus Torvalds jb = journal->j_current_jl->j_list_bitmap; 36421da177e4SLinus Torvalds if (!jb) { 3643a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-1702", 3644c3a9c210SJeff Mahoney "journal_list_bitmap is NULL"); 36451da177e4SLinus Torvalds } 3646a9dd3643SJeff Mahoney set_bit_in_list_bitmap(sb, blocknr, jb); 36471da177e4SLinus Torvalds 36481da177e4SLinus Torvalds /* Note, the entire while loop is not allowed to schedule. */ 36491da177e4SLinus Torvalds 36501da177e4SLinus Torvalds if (bh) { 36511da177e4SLinus Torvalds clear_prepared_bits(bh); 36521da177e4SLinus Torvalds reiserfs_clean_and_file_buffer(bh); 36531da177e4SLinus Torvalds } 3654a9dd3643SJeff Mahoney cleaned = remove_from_transaction(sb, blocknr, cleaned); 36551da177e4SLinus Torvalds 36561da177e4SLinus Torvalds /* find all older transactions with this block, make sure they don't try to write it out */ 3657a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, journal->j_list_hash_table, 3658bd4c625cSLinus Torvalds blocknr); 36591da177e4SLinus Torvalds while (cn) { 3660a9dd3643SJeff Mahoney if (sb == cn->sb && blocknr == cn->blocknr) { 36611da177e4SLinus Torvalds set_bit(BLOCK_FREED, &cn->state); 36621da177e4SLinus Torvalds if (cn->bh) { 36631da177e4SLinus Torvalds if (!cleaned) { 36641da177e4SLinus Torvalds /* remove_from_transaction will brelse the buffer if it was 36651da177e4SLinus Torvalds ** in the current trans 36661da177e4SLinus Torvalds */ 3667bd4c625cSLinus Torvalds clear_buffer_journal_dirty(cn-> 3668bd4c625cSLinus Torvalds bh); 36691da177e4SLinus Torvalds clear_buffer_dirty(cn->bh); 3670bd4c625cSLinus Torvalds clear_buffer_journal_test(cn-> 3671bd4c625cSLinus Torvalds bh); 36721da177e4SLinus Torvalds cleaned = 1; 36731da177e4SLinus Torvalds put_bh(cn->bh); 3674bd4c625cSLinus Torvalds if (atomic_read 3675bd4c625cSLinus Torvalds (&(cn->bh->b_count)) < 0) { 3676a9dd3643SJeff Mahoney reiserfs_warning(sb, 367745b03d5eSJeff Mahoney "journal-2138", 367845b03d5eSJeff Mahoney "cn->bh->b_count < 0"); 36791da177e4SLinus Torvalds } 36801da177e4SLinus Torvalds } 36811da177e4SLinus Torvalds if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3682bd4c625cSLinus Torvalds atomic_dec(& 3683bd4c625cSLinus Torvalds (cn->jlist-> 3684bd4c625cSLinus Torvalds j_nonzerolen)); 36851da177e4SLinus Torvalds } 36861da177e4SLinus Torvalds cn->bh = NULL; 36871da177e4SLinus Torvalds } 36881da177e4SLinus Torvalds } 36891da177e4SLinus Torvalds cn = cn->hnext; 36901da177e4SLinus Torvalds } 36911da177e4SLinus Torvalds } 36921da177e4SLinus Torvalds 3693398c95bdSChris Mason if (bh) 3694398c95bdSChris Mason release_buffer_page(bh); /* get_hash grabs the buffer */ 36951da177e4SLinus Torvalds return 0; 36961da177e4SLinus Torvalds } 36971da177e4SLinus Torvalds 3698bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode) 3699bd4c625cSLinus Torvalds { 37001da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); 37011da177e4SLinus Torvalds REISERFS_I(inode)->i_jl = journal->j_current_jl; 37021da177e4SLinus Torvalds REISERFS_I(inode)->i_trans_id = journal->j_trans_id; 37031da177e4SLinus Torvalds } 37041da177e4SLinus Torvalds 37051da177e4SLinus Torvalds /* 37061da177e4SLinus Torvalds * returns -1 on error, 0 if no commits/barriers were done and 1 37071da177e4SLinus Torvalds * if a transaction was actually committed and the barrier was done 37081da177e4SLinus Torvalds */ 37091da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id, 37101da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 37111da177e4SLinus Torvalds { 37121da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 37131da177e4SLinus Torvalds struct super_block *sb = inode->i_sb; 37141da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 37151da177e4SLinus Torvalds int ret = 0; 37161da177e4SLinus Torvalds 37171da177e4SLinus Torvalds /* is it from the current transaction, or from an unknown transaction? */ 37181da177e4SLinus Torvalds if (id == journal->j_trans_id) { 37191da177e4SLinus Torvalds jl = journal->j_current_jl; 37201da177e4SLinus Torvalds /* try to let other writers come in and grow this transaction */ 37211da177e4SLinus Torvalds let_transaction_grow(sb, id); 37221da177e4SLinus Torvalds if (journal->j_trans_id != id) { 37231da177e4SLinus Torvalds goto flush_commit_only; 37241da177e4SLinus Torvalds } 37251da177e4SLinus Torvalds 37261da177e4SLinus Torvalds ret = journal_begin(&th, sb, 1); 37271da177e4SLinus Torvalds if (ret) 37281da177e4SLinus Torvalds return ret; 37291da177e4SLinus Torvalds 37301da177e4SLinus Torvalds /* someone might have ended this transaction while we joined */ 37311da177e4SLinus Torvalds if (journal->j_trans_id != id) { 3732bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3733bd4c625cSLinus Torvalds 1); 37341da177e4SLinus Torvalds journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); 37351da177e4SLinus Torvalds ret = journal_end(&th, sb, 1); 37361da177e4SLinus Torvalds goto flush_commit_only; 37371da177e4SLinus Torvalds } 37381da177e4SLinus Torvalds 37391da177e4SLinus Torvalds ret = journal_end_sync(&th, sb, 1); 37401da177e4SLinus Torvalds if (!ret) 37411da177e4SLinus Torvalds ret = 1; 37421da177e4SLinus Torvalds 37431da177e4SLinus Torvalds } else { 37441da177e4SLinus Torvalds /* this gets tricky, we have to make sure the journal list in 37451da177e4SLinus Torvalds * the inode still exists. We know the list is still around 37461da177e4SLinus Torvalds * if we've got a larger transaction id than the oldest list 37471da177e4SLinus Torvalds */ 37481da177e4SLinus Torvalds flush_commit_only: 37491da177e4SLinus Torvalds if (journal_list_still_alive(inode->i_sb, id)) { 37501da177e4SLinus Torvalds /* 37511da177e4SLinus Torvalds * we only set ret to 1 when we know for sure 37521da177e4SLinus Torvalds * the barrier hasn't been started yet on the commit 37531da177e4SLinus Torvalds * block. 37541da177e4SLinus Torvalds */ 37551da177e4SLinus Torvalds if (atomic_read(&jl->j_commit_left) > 1) 37561da177e4SLinus Torvalds ret = 1; 37571da177e4SLinus Torvalds flush_commit_list(sb, jl, 1); 37581da177e4SLinus Torvalds if (journal->j_errno) 37591da177e4SLinus Torvalds ret = journal->j_errno; 37601da177e4SLinus Torvalds } 37611da177e4SLinus Torvalds } 37621da177e4SLinus Torvalds /* otherwise the list is gone, and long since committed */ 37631da177e4SLinus Torvalds return ret; 37641da177e4SLinus Torvalds } 37651da177e4SLinus Torvalds 3766bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode) 3767bd4c625cSLinus Torvalds { 3768600ed416SJeff Mahoney unsigned int id = REISERFS_I(inode)->i_trans_id; 37691da177e4SLinus Torvalds struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 37701da177e4SLinus Torvalds 37711da177e4SLinus Torvalds /* for the whole inode, assume unset id means it was 37721da177e4SLinus Torvalds * changed in the current transaction. More conservative 37731da177e4SLinus Torvalds */ 37741da177e4SLinus Torvalds if (!id || !jl) { 37751da177e4SLinus Torvalds reiserfs_update_inode_transaction(inode); 37761da177e4SLinus Torvalds id = REISERFS_I(inode)->i_trans_id; 37771da177e4SLinus Torvalds /* jl will be updated in __commit_trans_jl */ 37781da177e4SLinus Torvalds } 37791da177e4SLinus Torvalds 37801da177e4SLinus Torvalds return __commit_trans_jl(inode, id, jl); 37811da177e4SLinus Torvalds } 37821da177e4SLinus Torvalds 3783a9dd3643SJeff Mahoney void reiserfs_restore_prepared_buffer(struct super_block *sb, 3784bd4c625cSLinus Torvalds struct buffer_head *bh) 3785bd4c625cSLinus Torvalds { 3786a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 3787a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.restore_prepared); 37881da177e4SLinus Torvalds if (!bh) { 37891da177e4SLinus Torvalds return; 37901da177e4SLinus Torvalds } 37911da177e4SLinus Torvalds if (test_clear_buffer_journal_restore_dirty(bh) && 37921da177e4SLinus Torvalds buffer_journal_dirty(bh)) { 37931da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 3794278f6679SJeff Mahoney reiserfs_write_lock(sb); 3795a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, 37961da177e4SLinus Torvalds journal->j_list_hash_table, 37971da177e4SLinus Torvalds bh->b_blocknr); 37981da177e4SLinus Torvalds if (cn && can_dirty(cn)) { 37991da177e4SLinus Torvalds set_buffer_journal_test(bh); 38001da177e4SLinus Torvalds mark_buffer_dirty(bh); 38011da177e4SLinus Torvalds } 3802278f6679SJeff Mahoney reiserfs_write_unlock(sb); 38031da177e4SLinus Torvalds } 38041da177e4SLinus Torvalds clear_buffer_journal_prepared(bh); 38051da177e4SLinus Torvalds } 38061da177e4SLinus Torvalds 38071da177e4SLinus Torvalds extern struct tree_balance *cur_tb; 38081da177e4SLinus Torvalds /* 38091da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't 38101da177e4SLinus Torvalds ** be written to disk while we are altering it. So, we must: 38111da177e4SLinus Torvalds ** clean it 38121da177e4SLinus Torvalds ** wait on it. 38131da177e4SLinus Torvalds ** 38141da177e4SLinus Torvalds */ 3815a9dd3643SJeff Mahoney int reiserfs_prepare_for_journal(struct super_block *sb, 3816bd4c625cSLinus Torvalds struct buffer_head *bh, int wait) 3817bd4c625cSLinus Torvalds { 3818a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.prepare); 38191da177e4SLinus Torvalds 3820ca5de404SNick Piggin if (!trylock_buffer(bh)) { 38211da177e4SLinus Torvalds if (!wait) 38221da177e4SLinus Torvalds return 0; 38231da177e4SLinus Torvalds lock_buffer(bh); 38241da177e4SLinus Torvalds } 38251da177e4SLinus Torvalds set_buffer_journal_prepared(bh); 38261da177e4SLinus Torvalds if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { 38271da177e4SLinus Torvalds clear_buffer_journal_test(bh); 38281da177e4SLinus Torvalds set_buffer_journal_restore_dirty(bh); 38291da177e4SLinus Torvalds } 38301da177e4SLinus Torvalds unlock_buffer(bh); 38311da177e4SLinus Torvalds return 1; 38321da177e4SLinus Torvalds } 38331da177e4SLinus Torvalds 38341da177e4SLinus Torvalds /* 38351da177e4SLinus Torvalds ** long and ugly. If flush, will not return until all commit 38361da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk. 38371da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk. 38381da177e4SLinus Torvalds ** 38391da177e4SLinus Torvalds ** keep reading, there are comments as you go along 38401da177e4SLinus Torvalds ** 38411da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing 38421da177e4SLinus Torvalds ** journal lists, etc just won't happen. 38431da177e4SLinus Torvalds */ 3844bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th, 3845a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks, 3846bd4c625cSLinus Torvalds int flags) 3847bd4c625cSLinus Torvalds { 3848a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 38491da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *next, *jl_cn; 38501da177e4SLinus Torvalds struct reiserfs_journal_cnode *last_cn = NULL; 38511da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 38521da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 38531da177e4SLinus Torvalds struct buffer_head *c_bh; /* commit bh */ 38541da177e4SLinus Torvalds struct buffer_head *d_bh; /* desc bh */ 38551da177e4SLinus Torvalds int cur_write_start = 0; /* start index of current log write */ 38561da177e4SLinus Torvalds int old_start; 38571da177e4SLinus Torvalds int i; 3858a44c94a7SAlexander Zarochentsev int flush; 3859a44c94a7SAlexander Zarochentsev int wait_on_commit; 38601da177e4SLinus Torvalds struct reiserfs_journal_list *jl, *temp_jl; 38611da177e4SLinus Torvalds struct list_head *entry, *safe; 38621da177e4SLinus Torvalds unsigned long jindex; 3863600ed416SJeff Mahoney unsigned int commit_trans_id; 38641da177e4SLinus Torvalds int trans_half; 3865278f6679SJeff Mahoney int depth; 38661da177e4SLinus Torvalds 38671da177e4SLinus Torvalds BUG_ON(th->t_refcount > 1); 38681da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 38691da177e4SLinus Torvalds 3870a44c94a7SAlexander Zarochentsev /* protect flush_older_commits from doing mistakes if the 3871a44c94a7SAlexander Zarochentsev transaction ID counter gets overflowed. */ 3872600ed416SJeff Mahoney if (th->t_trans_id == ~0U) 3873a44c94a7SAlexander Zarochentsev flags |= FLUSH_ALL | COMMIT_NOW | WAIT; 3874a44c94a7SAlexander Zarochentsev flush = flags & FLUSH_ALL; 3875a44c94a7SAlexander Zarochentsev wait_on_commit = flags & WAIT; 3876a44c94a7SAlexander Zarochentsev 38771da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 3878a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "journal end"); 38791da177e4SLinus Torvalds if (journal->j_len == 0) { 3880a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3881bd4c625cSLinus Torvalds 1); 3882a9dd3643SJeff Mahoney journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); 38831da177e4SLinus Torvalds } 38841da177e4SLinus Torvalds 3885a9dd3643SJeff Mahoney lock_journal(sb); 38861da177e4SLinus Torvalds if (journal->j_next_full_flush) { 38871da177e4SLinus Torvalds flags |= FLUSH_ALL; 38881da177e4SLinus Torvalds flush = 1; 38891da177e4SLinus Torvalds } 38901da177e4SLinus Torvalds if (journal->j_next_async_flush) { 38911da177e4SLinus Torvalds flags |= COMMIT_NOW | WAIT; 38921da177e4SLinus Torvalds wait_on_commit = 1; 38931da177e4SLinus Torvalds } 38941da177e4SLinus Torvalds 38951da177e4SLinus Torvalds /* check_journal_end locks the journal, and unlocks if it does not return 1 38961da177e4SLinus Torvalds ** it tells us if we should continue with the journal_end, or just return 38971da177e4SLinus Torvalds */ 3898a9dd3643SJeff Mahoney if (!check_journal_end(th, sb, nblocks, flags)) { 3899033369d1SArtem Bityutskiy reiserfs_schedule_old_flush(sb); 3900a9dd3643SJeff Mahoney wake_queued_writers(sb); 3901a9dd3643SJeff Mahoney reiserfs_async_progress_wait(sb); 39021da177e4SLinus Torvalds goto out; 39031da177e4SLinus Torvalds } 39041da177e4SLinus Torvalds 39051da177e4SLinus Torvalds /* check_journal_end might set these, check again */ 39061da177e4SLinus Torvalds if (journal->j_next_full_flush) { 39071da177e4SLinus Torvalds flush = 1; 39081da177e4SLinus Torvalds } 39091da177e4SLinus Torvalds 39101da177e4SLinus Torvalds /* 39111da177e4SLinus Torvalds ** j must wait means we have to flush the log blocks, and the real blocks for 39121da177e4SLinus Torvalds ** this transaction 39131da177e4SLinus Torvalds */ 39141da177e4SLinus Torvalds if (journal->j_must_wait > 0) { 39151da177e4SLinus Torvalds flush = 1; 39161da177e4SLinus Torvalds } 39171da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE 3918ef43bc4fSJan Kara /* quota ops might need to nest, setup the journal_info pointer for them 3919ef43bc4fSJan Kara * and raise the refcount so that it is > 0. */ 39201da177e4SLinus Torvalds current->journal_info = th; 3921ef43bc4fSJan Kara th->t_refcount++; 39221da177e4SLinus Torvalds reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 39231da177e4SLinus Torvalds * the transaction */ 3924ef43bc4fSJan Kara th->t_refcount--; 39251da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 39261da177e4SLinus Torvalds #endif 39271da177e4SLinus Torvalds 39281da177e4SLinus Torvalds /* setup description block */ 3929bd4c625cSLinus Torvalds d_bh = 3930a9dd3643SJeff Mahoney journal_getblk(sb, 3931a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 3932bd4c625cSLinus Torvalds journal->j_start); 39331da177e4SLinus Torvalds set_buffer_uptodate(d_bh); 39341da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; 39351da177e4SLinus Torvalds memset(d_bh->b_data, 0, d_bh->b_size); 39361da177e4SLinus Torvalds memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); 39371da177e4SLinus Torvalds set_desc_trans_id(desc, journal->j_trans_id); 39381da177e4SLinus Torvalds 39391da177e4SLinus Torvalds /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 3940a9dd3643SJeff Mahoney c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 3941bd4c625cSLinus Torvalds ((journal->j_start + journal->j_len + 3942a9dd3643SJeff Mahoney 1) % SB_ONDISK_JOURNAL_SIZE(sb))); 39431da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 39441da177e4SLinus Torvalds memset(c_bh->b_data, 0, c_bh->b_size); 39451da177e4SLinus Torvalds set_commit_trans_id(commit, journal->j_trans_id); 39461da177e4SLinus Torvalds set_buffer_uptodate(c_bh); 39471da177e4SLinus Torvalds 39481da177e4SLinus Torvalds /* init this journal list */ 39491da177e4SLinus Torvalds jl = journal->j_current_jl; 39501da177e4SLinus Torvalds 39511da177e4SLinus Torvalds /* we lock the commit before doing anything because 39521da177e4SLinus Torvalds * we want to make sure nobody tries to run flush_commit_list until 39531da177e4SLinus Torvalds * the new transaction is fully setup, and we've already flushed the 39541da177e4SLinus Torvalds * ordered bh list 39551da177e4SLinus Torvalds */ 39568ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb); 39571da177e4SLinus Torvalds 39581da177e4SLinus Torvalds /* save the transaction id in case we need to commit it later */ 39591da177e4SLinus Torvalds commit_trans_id = jl->j_trans_id; 39601da177e4SLinus Torvalds 39611da177e4SLinus Torvalds atomic_set(&jl->j_older_commits_done, 0); 39621da177e4SLinus Torvalds jl->j_trans_id = journal->j_trans_id; 39631da177e4SLinus Torvalds jl->j_timestamp = journal->j_trans_start_time; 39641da177e4SLinus Torvalds jl->j_commit_bh = c_bh; 39651da177e4SLinus Torvalds jl->j_start = journal->j_start; 39661da177e4SLinus Torvalds jl->j_len = journal->j_len; 39671da177e4SLinus Torvalds atomic_set(&jl->j_nonzerolen, journal->j_len); 39681da177e4SLinus Torvalds atomic_set(&jl->j_commit_left, journal->j_len + 2); 39691da177e4SLinus Torvalds jl->j_realblock = NULL; 39701da177e4SLinus Torvalds 39711da177e4SLinus Torvalds /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 39721da177e4SLinus Torvalds ** for each real block, add it to the journal list hash, 39731da177e4SLinus Torvalds ** copy into real block index array in the commit or desc block 39741da177e4SLinus Torvalds */ 3975a9dd3643SJeff Mahoney trans_half = journal_trans_half(sb->s_blocksize); 39761da177e4SLinus Torvalds for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { 39771da177e4SLinus Torvalds if (buffer_journaled(cn->bh)) { 3978a9dd3643SJeff Mahoney jl_cn = get_cnode(sb); 39791da177e4SLinus Torvalds if (!jl_cn) { 3980a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-1676", 3981c3a9c210SJeff Mahoney "get_cnode returned NULL"); 39821da177e4SLinus Torvalds } 39831da177e4SLinus Torvalds if (i == 0) { 39841da177e4SLinus Torvalds jl->j_realblock = jl_cn; 39851da177e4SLinus Torvalds } 39861da177e4SLinus Torvalds jl_cn->prev = last_cn; 39871da177e4SLinus Torvalds jl_cn->next = NULL; 39881da177e4SLinus Torvalds if (last_cn) { 39891da177e4SLinus Torvalds last_cn->next = jl_cn; 39901da177e4SLinus Torvalds } 39911da177e4SLinus Torvalds last_cn = jl_cn; 39921da177e4SLinus Torvalds /* make sure the block we are trying to log is not a block 39931da177e4SLinus Torvalds of journal or reserved area */ 39941da177e4SLinus Torvalds 3995bd4c625cSLinus Torvalds if (is_block_in_log_or_reserved_area 3996a9dd3643SJeff Mahoney (sb, cn->bh->b_blocknr)) { 3997a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-2332", 3998c3a9c210SJeff Mahoney "Trying to log block %lu, " 3999c3a9c210SJeff Mahoney "which is a log block", 4000bd4c625cSLinus Torvalds cn->bh->b_blocknr); 40011da177e4SLinus Torvalds } 40021da177e4SLinus Torvalds jl_cn->blocknr = cn->bh->b_blocknr; 40031da177e4SLinus Torvalds jl_cn->state = 0; 4004a9dd3643SJeff Mahoney jl_cn->sb = sb; 40051da177e4SLinus Torvalds jl_cn->bh = cn->bh; 40061da177e4SLinus Torvalds jl_cn->jlist = jl; 40071da177e4SLinus Torvalds insert_journal_hash(journal->j_list_hash_table, jl_cn); 40081da177e4SLinus Torvalds if (i < trans_half) { 4009bd4c625cSLinus Torvalds desc->j_realblock[i] = 4010bd4c625cSLinus Torvalds cpu_to_le32(cn->bh->b_blocknr); 40111da177e4SLinus Torvalds } else { 4012bd4c625cSLinus Torvalds commit->j_realblock[i - trans_half] = 4013bd4c625cSLinus Torvalds cpu_to_le32(cn->bh->b_blocknr); 40141da177e4SLinus Torvalds } 40151da177e4SLinus Torvalds } else { 40161da177e4SLinus Torvalds i--; 40171da177e4SLinus Torvalds } 40181da177e4SLinus Torvalds } 40191da177e4SLinus Torvalds set_desc_trans_len(desc, journal->j_len); 40201da177e4SLinus Torvalds set_desc_mount_id(desc, journal->j_mount_id); 40211da177e4SLinus Torvalds set_desc_trans_id(desc, journal->j_trans_id); 40221da177e4SLinus Torvalds set_commit_trans_len(commit, journal->j_len); 40231da177e4SLinus Torvalds 40241da177e4SLinus Torvalds /* special check in case all buffers in the journal were marked for not logging */ 402514a61442SEric Sesterhenn BUG_ON(journal->j_len == 0); 40261da177e4SLinus Torvalds 40271da177e4SLinus Torvalds /* we're about to dirty all the log blocks, mark the description block 40281da177e4SLinus Torvalds * dirty now too. Don't mark the commit block dirty until all the 40291da177e4SLinus Torvalds * others are on disk 40301da177e4SLinus Torvalds */ 40311da177e4SLinus Torvalds mark_buffer_dirty(d_bh); 40321da177e4SLinus Torvalds 40331da177e4SLinus Torvalds /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 40341da177e4SLinus Torvalds cur_write_start = journal->j_start; 40351da177e4SLinus Torvalds cn = journal->j_first; 40361da177e4SLinus Torvalds jindex = 1; /* start at one so we don't get the desc again */ 40371da177e4SLinus Torvalds while (cn) { 40381da177e4SLinus Torvalds clear_buffer_journal_new(cn->bh); 40391da177e4SLinus Torvalds /* copy all the real blocks into log area. dirty log blocks */ 40401da177e4SLinus Torvalds if (buffer_journaled(cn->bh)) { 40411da177e4SLinus Torvalds struct buffer_head *tmp_bh; 40421da177e4SLinus Torvalds char *addr; 40431da177e4SLinus Torvalds struct page *page; 4044bd4c625cSLinus Torvalds tmp_bh = 4045a9dd3643SJeff Mahoney journal_getblk(sb, 4046a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 4047bd4c625cSLinus Torvalds ((cur_write_start + 4048bd4c625cSLinus Torvalds jindex) % 4049a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb))); 40501da177e4SLinus Torvalds set_buffer_uptodate(tmp_bh); 40511da177e4SLinus Torvalds page = cn->bh->b_page; 40521da177e4SLinus Torvalds addr = kmap(page); 4053bd4c625cSLinus Torvalds memcpy(tmp_bh->b_data, 4054bd4c625cSLinus Torvalds addr + offset_in_page(cn->bh->b_data), 40551da177e4SLinus Torvalds cn->bh->b_size); 40561da177e4SLinus Torvalds kunmap(page); 40571da177e4SLinus Torvalds mark_buffer_dirty(tmp_bh); 40581da177e4SLinus Torvalds jindex++; 40591da177e4SLinus Torvalds set_buffer_journal_dirty(cn->bh); 40601da177e4SLinus Torvalds clear_buffer_journaled(cn->bh); 40611da177e4SLinus Torvalds } else { 40621da177e4SLinus Torvalds /* JDirty cleared sometime during transaction. don't log this one */ 4063a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2048", 406445b03d5eSJeff Mahoney "BAD, buffer in journal hash, " 406545b03d5eSJeff Mahoney "but not JDirty!"); 40661da177e4SLinus Torvalds brelse(cn->bh); 40671da177e4SLinus Torvalds } 40681da177e4SLinus Torvalds next = cn->next; 4069a9dd3643SJeff Mahoney free_cnode(sb, cn); 40701da177e4SLinus Torvalds cn = next; 4071278f6679SJeff Mahoney reiserfs_cond_resched(sb); 40721da177e4SLinus Torvalds } 40731da177e4SLinus Torvalds 40741da177e4SLinus Torvalds /* we are done with both the c_bh and d_bh, but 40751da177e4SLinus Torvalds ** c_bh must be written after all other commit blocks, 40761da177e4SLinus Torvalds ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 40771da177e4SLinus Torvalds */ 40781da177e4SLinus Torvalds 4079a9dd3643SJeff Mahoney journal->j_current_jl = alloc_journal_list(sb); 40801da177e4SLinus Torvalds 40811da177e4SLinus Torvalds /* now it is safe to insert this transaction on the main list */ 40821da177e4SLinus Torvalds list_add_tail(&jl->j_list, &journal->j_journal_list); 40831da177e4SLinus Torvalds list_add_tail(&jl->j_working_list, &journal->j_working_list); 40841da177e4SLinus Torvalds journal->j_num_work_lists++; 40851da177e4SLinus Torvalds 40861da177e4SLinus Torvalds /* reset journal values for the next transaction */ 40871da177e4SLinus Torvalds old_start = journal->j_start; 4088bd4c625cSLinus Torvalds journal->j_start = 4089bd4c625cSLinus Torvalds (journal->j_start + journal->j_len + 4090a9dd3643SJeff Mahoney 2) % SB_ONDISK_JOURNAL_SIZE(sb); 40911da177e4SLinus Torvalds atomic_set(&(journal->j_wcount), 0); 40921da177e4SLinus Torvalds journal->j_bcount = 0; 40931da177e4SLinus Torvalds journal->j_last = NULL; 40941da177e4SLinus Torvalds journal->j_first = NULL; 40951da177e4SLinus Torvalds journal->j_len = 0; 40961da177e4SLinus Torvalds journal->j_trans_start_time = 0; 4097a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 4098a44c94a7SAlexander Zarochentsev if (++journal->j_trans_id == 0) 4099a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 41001da177e4SLinus Torvalds journal->j_current_jl->j_trans_id = journal->j_trans_id; 41011da177e4SLinus Torvalds journal->j_must_wait = 0; 41021da177e4SLinus Torvalds journal->j_len_alloc = 0; 41031da177e4SLinus Torvalds journal->j_next_full_flush = 0; 41041da177e4SLinus Torvalds journal->j_next_async_flush = 0; 4105a9dd3643SJeff Mahoney init_journal_hash(sb); 41061da177e4SLinus Torvalds 41071da177e4SLinus Torvalds // make sure reiserfs_add_jh sees the new current_jl before we 41081da177e4SLinus Torvalds // write out the tails 41091da177e4SLinus Torvalds smp_mb(); 41101da177e4SLinus Torvalds 41111da177e4SLinus Torvalds /* tail conversion targets have to hit the disk before we end the 41121da177e4SLinus Torvalds * transaction. Otherwise a later transaction might repack the tail 41131da177e4SLinus Torvalds * before this transaction commits, leaving the data block unflushed and 41141da177e4SLinus Torvalds * clean, if we crash before the later transaction commits, the data block 41151da177e4SLinus Torvalds * is lost. 41161da177e4SLinus Torvalds */ 41171da177e4SLinus Torvalds if (!list_empty(&jl->j_tail_bh_list)) { 4118278f6679SJeff Mahoney depth = reiserfs_write_unlock_nested(sb); 41191da177e4SLinus Torvalds write_ordered_buffers(&journal->j_dirty_buffers_lock, 41201da177e4SLinus Torvalds journal, jl, &jl->j_tail_bh_list); 4121278f6679SJeff Mahoney reiserfs_write_lock_nested(sb, depth); 41221da177e4SLinus Torvalds } 412314a61442SEric Sesterhenn BUG_ON(!list_empty(&jl->j_tail_bh_list)); 412490415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 41251da177e4SLinus Torvalds 41261da177e4SLinus Torvalds /* honor the flush wishes from the caller, simple commits can 41271da177e4SLinus Torvalds ** be done outside the journal lock, they are done below 41281da177e4SLinus Torvalds ** 41291da177e4SLinus Torvalds ** if we don't flush the commit list right now, we put it into 41301da177e4SLinus Torvalds ** the work queue so the people waiting on the async progress work 41311da177e4SLinus Torvalds ** queue don't wait for this proc to flush journal lists and such. 41321da177e4SLinus Torvalds */ 41331da177e4SLinus Torvalds if (flush) { 4134a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 4135a9dd3643SJeff Mahoney flush_journal_list(sb, jl, 1); 41361da177e4SLinus Torvalds } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 41371da177e4SLinus Torvalds queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); 41381da177e4SLinus Torvalds 41391da177e4SLinus Torvalds /* if the next transaction has any chance of wrapping, flush 41401da177e4SLinus Torvalds ** transactions that might get overwritten. If any journal lists are very 41411da177e4SLinus Torvalds ** old flush them as well. 41421da177e4SLinus Torvalds */ 41431da177e4SLinus Torvalds first_jl: 41441da177e4SLinus Torvalds list_for_each_safe(entry, safe, &journal->j_journal_list) { 41451da177e4SLinus Torvalds temp_jl = JOURNAL_LIST_ENTRY(entry); 41461da177e4SLinus Torvalds if (journal->j_start <= temp_jl->j_start) { 41471da177e4SLinus Torvalds if ((journal->j_start + journal->j_trans_max + 1) >= 4148bd4c625cSLinus Torvalds temp_jl->j_start) { 4149a9dd3643SJeff Mahoney flush_used_journal_lists(sb, temp_jl); 41501da177e4SLinus Torvalds goto first_jl; 41511da177e4SLinus Torvalds } else if ((journal->j_start + 41521da177e4SLinus Torvalds journal->j_trans_max + 1) < 4153a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)) { 41541da177e4SLinus Torvalds /* if we don't cross into the next transaction and we don't 41551da177e4SLinus Torvalds * wrap, there is no way we can overlap any later transactions 41561da177e4SLinus Torvalds * break now 41571da177e4SLinus Torvalds */ 41581da177e4SLinus Torvalds break; 41591da177e4SLinus Torvalds } 41601da177e4SLinus Torvalds } else if ((journal->j_start + 41611da177e4SLinus Torvalds journal->j_trans_max + 1) > 4162a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)) { 41631da177e4SLinus Torvalds if (((journal->j_start + journal->j_trans_max + 1) % 4164a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)) >= 4165bd4c625cSLinus Torvalds temp_jl->j_start) { 4166a9dd3643SJeff Mahoney flush_used_journal_lists(sb, temp_jl); 41671da177e4SLinus Torvalds goto first_jl; 41681da177e4SLinus Torvalds } else { 41691da177e4SLinus Torvalds /* we don't overlap anything from out start to the end of the 41701da177e4SLinus Torvalds * log, and our wrapped portion doesn't overlap anything at 41711da177e4SLinus Torvalds * the start of the log. We can break 41721da177e4SLinus Torvalds */ 41731da177e4SLinus Torvalds break; 41741da177e4SLinus Torvalds } 41751da177e4SLinus Torvalds } 41761da177e4SLinus Torvalds } 41771da177e4SLinus Torvalds 4178bd4c625cSLinus Torvalds journal->j_current_jl->j_list_bitmap = 4179a9dd3643SJeff Mahoney get_list_bitmap(sb, journal->j_current_jl); 41801da177e4SLinus Torvalds 41811da177e4SLinus Torvalds if (!(journal->j_current_jl->j_list_bitmap)) { 4182a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-1996", 4183c3a9c210SJeff Mahoney "could not get a list bitmap"); 41841da177e4SLinus Torvalds } 41851da177e4SLinus Torvalds 41861da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 0); 4187a9dd3643SJeff Mahoney unlock_journal(sb); 41881da177e4SLinus Torvalds /* wake up any body waiting to join. */ 41891da177e4SLinus Torvalds clear_bit(J_WRITERS_QUEUED, &journal->j_state); 41901da177e4SLinus Torvalds wake_up(&(journal->j_join_wait)); 41911da177e4SLinus Torvalds 41921da177e4SLinus Torvalds if (!flush && wait_on_commit && 4193a9dd3643SJeff Mahoney journal_list_still_alive(sb, commit_trans_id)) { 4194a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 41951da177e4SLinus Torvalds } 41961da177e4SLinus Torvalds out: 4197a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "journal end2"); 41981da177e4SLinus Torvalds 41991da177e4SLinus Torvalds memset(th, 0, sizeof(*th)); 42001da177e4SLinus Torvalds /* Re-set th->t_super, so we can properly keep track of how many 42011da177e4SLinus Torvalds * persistent transactions there are. We need to do this so if this 42021da177e4SLinus Torvalds * call is part of a failed restart_transaction, we can free it later */ 4203a9dd3643SJeff Mahoney th->t_super = sb; 42041da177e4SLinus Torvalds 42051da177e4SLinus Torvalds return journal->j_errno; 42061da177e4SLinus Torvalds } 42071da177e4SLinus Torvalds 420832e8b106SJeff Mahoney /* Send the file system read only and refuse new transactions */ 420932e8b106SJeff Mahoney void reiserfs_abort_journal(struct super_block *sb, int errno) 42101da177e4SLinus Torvalds { 42111da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 42121da177e4SLinus Torvalds if (test_bit(J_ABORTED, &journal->j_state)) 42131da177e4SLinus Torvalds return; 42141da177e4SLinus Torvalds 421532e8b106SJeff Mahoney if (!journal->j_errno) 421632e8b106SJeff Mahoney journal->j_errno = errno; 42171da177e4SLinus Torvalds 42181da177e4SLinus Torvalds sb->s_flags |= MS_RDONLY; 42191da177e4SLinus Torvalds set_bit(J_ABORTED, &journal->j_state); 42201da177e4SLinus Torvalds 42211da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 42221da177e4SLinus Torvalds dump_stack(); 42231da177e4SLinus Torvalds #endif 42241da177e4SLinus Torvalds } 4225