1 /* 2 * linux/fs/jbd2/journal.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Generic filesystem journal-writing code; part of the ext2fs 13 * journaling system. 14 * 15 * This file manages journals: areas of disk reserved for logging 16 * transactional updates. This includes the kernel journaling thread 17 * which is responsible for scheduling updates to the log. 18 * 19 * We do not actually manage the physical storage of the journal in this 20 * file: that is left to a per-journal policy function, which allows us 21 * to store the journal within a filesystem-specified area for ext2 22 * journaling (ext2 can use a reserved inode for storing the log). 23 */ 24 25 #include <linux/module.h> 26 #include <linux/time.h> 27 #include <linux/fs.h> 28 #include <linux/jbd2.h> 29 #include <linux/errno.h> 30 #include <linux/slab.h> 31 #include <linux/init.h> 32 #include <linux/mm.h> 33 #include <linux/freezer.h> 34 #include <linux/pagemap.h> 35 #include <linux/kthread.h> 36 #include <linux/poison.h> 37 #include <linux/proc_fs.h> 38 #include <linux/seq_file.h> 39 #include <linux/math64.h> 40 #include <linux/hash.h> 41 #include <linux/log2.h> 42 #include <linux/vmalloc.h> 43 #include <linux/backing-dev.h> 44 #include <linux/bitops.h> 45 #include <linux/ratelimit.h> 46 47 #define CREATE_TRACE_POINTS 48 #include <trace/events/jbd2.h> 49 50 #include <asm/uaccess.h> 51 #include <asm/page.h> 52 53 #ifdef CONFIG_JBD2_DEBUG 54 ushort jbd2_journal_enable_debug __read_mostly; 55 EXPORT_SYMBOL(jbd2_journal_enable_debug); 56 57 module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644); 58 MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2"); 59 #endif 60 61 EXPORT_SYMBOL(jbd2_journal_extend); 62 EXPORT_SYMBOL(jbd2_journal_stop); 63 EXPORT_SYMBOL(jbd2_journal_lock_updates); 64 EXPORT_SYMBOL(jbd2_journal_unlock_updates); 65 EXPORT_SYMBOL(jbd2_journal_get_write_access); 66 EXPORT_SYMBOL(jbd2_journal_get_create_access); 67 EXPORT_SYMBOL(jbd2_journal_get_undo_access); 68 EXPORT_SYMBOL(jbd2_journal_set_triggers); 69 EXPORT_SYMBOL(jbd2_journal_dirty_metadata); 70 EXPORT_SYMBOL(jbd2_journal_forget); 71 #if 0 72 EXPORT_SYMBOL(journal_sync_buffer); 73 #endif 74 EXPORT_SYMBOL(jbd2_journal_flush); 75 EXPORT_SYMBOL(jbd2_journal_revoke); 76 77 EXPORT_SYMBOL(jbd2_journal_init_dev); 78 EXPORT_SYMBOL(jbd2_journal_init_inode); 79 EXPORT_SYMBOL(jbd2_journal_check_used_features); 80 EXPORT_SYMBOL(jbd2_journal_check_available_features); 81 EXPORT_SYMBOL(jbd2_journal_set_features); 82 EXPORT_SYMBOL(jbd2_journal_load); 83 EXPORT_SYMBOL(jbd2_journal_destroy); 84 EXPORT_SYMBOL(jbd2_journal_abort); 85 EXPORT_SYMBOL(jbd2_journal_errno); 86 EXPORT_SYMBOL(jbd2_journal_ack_err); 87 EXPORT_SYMBOL(jbd2_journal_clear_err); 88 EXPORT_SYMBOL(jbd2_log_wait_commit); 89 EXPORT_SYMBOL(jbd2_log_start_commit); 90 EXPORT_SYMBOL(jbd2_journal_start_commit); 91 EXPORT_SYMBOL(jbd2_journal_force_commit_nested); 92 EXPORT_SYMBOL(jbd2_journal_wipe); 93 EXPORT_SYMBOL(jbd2_journal_blocks_per_page); 94 EXPORT_SYMBOL(jbd2_journal_invalidatepage); 95 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); 96 EXPORT_SYMBOL(jbd2_journal_force_commit); 97 EXPORT_SYMBOL(jbd2_journal_file_inode); 98 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); 99 EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); 100 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 101 EXPORT_SYMBOL(jbd2_inode_cache); 102 103 static void __journal_abort_soft (journal_t *journal, int errno); 104 static int jbd2_journal_create_slab(size_t slab_size); 105 106 #ifdef CONFIG_JBD2_DEBUG 107 void __jbd2_debug(int level, const char *file, const char *func, 108 unsigned int line, const char *fmt, ...) 109 { 110 struct va_format vaf; 111 va_list args; 112 113 if (level > jbd2_journal_enable_debug) 114 return; 115 va_start(args, fmt); 116 vaf.fmt = fmt; 117 vaf.va = &args; 118 printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); 119 va_end(args); 120 } 121 EXPORT_SYMBOL(__jbd2_debug); 122 #endif 123 124 /* Checksumming functions */ 125 int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 126 { 127 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 128 return 1; 129 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 131 } 132 133 static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 134 { 135 __u32 csum; 136 __be32 old_csum; 137 138 old_csum = sb->s_checksum; 139 sb->s_checksum = 0; 140 csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t)); 141 sb->s_checksum = old_csum; 142 143 return cpu_to_be32(csum); 144 } 145 146 int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 147 { 148 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 149 return 1; 150 151 return sb->s_checksum == jbd2_superblock_csum(j, sb); 152 } 153 154 void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 155 { 156 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 157 return; 158 159 sb->s_checksum = jbd2_superblock_csum(j, sb); 160 } 161 162 /* 163 * Helper function used to manage commit timeouts 164 */ 165 166 static void commit_timeout(unsigned long __data) 167 { 168 struct task_struct * p = (struct task_struct *) __data; 169 170 wake_up_process(p); 171 } 172 173 /* 174 * kjournald2: The main thread function used to manage a logging device 175 * journal. 176 * 177 * This kernel thread is responsible for two things: 178 * 179 * 1) COMMIT: Every so often we need to commit the current state of the 180 * filesystem to disk. The journal thread is responsible for writing 181 * all of the metadata buffers to disk. 182 * 183 * 2) CHECKPOINT: We cannot reuse a used section of the log file until all 184 * of the data in that part of the log has been rewritten elsewhere on 185 * the disk. Flushing these old buffers to reclaim space in the log is 186 * known as checkpointing, and this thread is responsible for that job. 187 */ 188 189 static int kjournald2(void *arg) 190 { 191 journal_t *journal = arg; 192 transaction_t *transaction; 193 194 /* 195 * Set up an interval timer which can be used to trigger a commit wakeup 196 * after the commit interval expires 197 */ 198 setup_timer(&journal->j_commit_timer, commit_timeout, 199 (unsigned long)current); 200 201 set_freezable(); 202 203 /* Record that the journal thread is running */ 204 journal->j_task = current; 205 wake_up(&journal->j_wait_done_commit); 206 207 /* 208 * And now, wait forever for commit wakeup events. 209 */ 210 write_lock(&journal->j_state_lock); 211 212 loop: 213 if (journal->j_flags & JBD2_UNMOUNT) 214 goto end_loop; 215 216 jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", 217 journal->j_commit_sequence, journal->j_commit_request); 218 219 if (journal->j_commit_sequence != journal->j_commit_request) { 220 jbd_debug(1, "OK, requests differ\n"); 221 write_unlock(&journal->j_state_lock); 222 del_timer_sync(&journal->j_commit_timer); 223 jbd2_journal_commit_transaction(journal); 224 write_lock(&journal->j_state_lock); 225 goto loop; 226 } 227 228 wake_up(&journal->j_wait_done_commit); 229 if (freezing(current)) { 230 /* 231 * The simpler the better. Flushing journal isn't a 232 * good idea, because that depends on threads that may 233 * be already stopped. 234 */ 235 jbd_debug(1, "Now suspending kjournald2\n"); 236 write_unlock(&journal->j_state_lock); 237 try_to_freeze(); 238 write_lock(&journal->j_state_lock); 239 } else { 240 /* 241 * We assume on resume that commits are already there, 242 * so we don't sleep 243 */ 244 DEFINE_WAIT(wait); 245 int should_sleep = 1; 246 247 prepare_to_wait(&journal->j_wait_commit, &wait, 248 TASK_INTERRUPTIBLE); 249 if (journal->j_commit_sequence != journal->j_commit_request) 250 should_sleep = 0; 251 transaction = journal->j_running_transaction; 252 if (transaction && time_after_eq(jiffies, 253 transaction->t_expires)) 254 should_sleep = 0; 255 if (journal->j_flags & JBD2_UNMOUNT) 256 should_sleep = 0; 257 if (should_sleep) { 258 write_unlock(&journal->j_state_lock); 259 schedule(); 260 write_lock(&journal->j_state_lock); 261 } 262 finish_wait(&journal->j_wait_commit, &wait); 263 } 264 265 jbd_debug(1, "kjournald2 wakes\n"); 266 267 /* 268 * Were we woken up by a commit wakeup event? 269 */ 270 transaction = journal->j_running_transaction; 271 if (transaction && time_after_eq(jiffies, transaction->t_expires)) { 272 journal->j_commit_request = transaction->t_tid; 273 jbd_debug(1, "woke because of timeout\n"); 274 } 275 goto loop; 276 277 end_loop: 278 write_unlock(&journal->j_state_lock); 279 del_timer_sync(&journal->j_commit_timer); 280 journal->j_task = NULL; 281 wake_up(&journal->j_wait_done_commit); 282 jbd_debug(1, "Journal thread exiting.\n"); 283 return 0; 284 } 285 286 static int jbd2_journal_start_thread(journal_t *journal) 287 { 288 struct task_struct *t; 289 290 t = kthread_run(kjournald2, journal, "jbd2/%s", 291 journal->j_devname); 292 if (IS_ERR(t)) 293 return PTR_ERR(t); 294 295 wait_event(journal->j_wait_done_commit, journal->j_task != NULL); 296 return 0; 297 } 298 299 static void journal_kill_thread(journal_t *journal) 300 { 301 write_lock(&journal->j_state_lock); 302 journal->j_flags |= JBD2_UNMOUNT; 303 304 while (journal->j_task) { 305 wake_up(&journal->j_wait_commit); 306 write_unlock(&journal->j_state_lock); 307 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 308 write_lock(&journal->j_state_lock); 309 } 310 write_unlock(&journal->j_state_lock); 311 } 312 313 /* 314 * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. 315 * 316 * Writes a metadata buffer to a given disk block. The actual IO is not 317 * performed but a new buffer_head is constructed which labels the data 318 * to be written with the correct destination disk block. 319 * 320 * Any magic-number escaping which needs to be done will cause a 321 * copy-out here. If the buffer happens to start with the 322 * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the 323 * magic number is only written to the log for descripter blocks. In 324 * this case, we copy the data and replace the first word with 0, and we 325 * return a result code which indicates that this buffer needs to be 326 * marked as an escaped buffer in the corresponding log descriptor 327 * block. The missing word can then be restored when the block is read 328 * during recovery. 329 * 330 * If the source buffer has already been modified by a new transaction 331 * since we took the last commit snapshot, we use the frozen copy of 332 * that data for IO. If we end up using the existing buffer_head's data 333 * for the write, then we have to make sure nobody modifies it while the 334 * IO is in progress. do_get_write_access() handles this. 335 * 336 * The function returns a pointer to the buffer_head to be used for IO. 337 * 338 * 339 * Return value: 340 * <0: Error 341 * >=0: Finished OK 342 * 343 * On success: 344 * Bit 0 set == escape performed on the data 345 * Bit 1 set == buffer copy-out performed (kfree the data after IO) 346 */ 347 348 int jbd2_journal_write_metadata_buffer(transaction_t *transaction, 349 struct journal_head *jh_in, 350 struct buffer_head **bh_out, 351 sector_t blocknr) 352 { 353 int need_copy_out = 0; 354 int done_copy_out = 0; 355 int do_escape = 0; 356 char *mapped_data; 357 struct buffer_head *new_bh; 358 struct page *new_page; 359 unsigned int new_offset; 360 struct buffer_head *bh_in = jh2bh(jh_in); 361 journal_t *journal = transaction->t_journal; 362 363 /* 364 * The buffer really shouldn't be locked: only the current committing 365 * transaction is allowed to write it, so nobody else is allowed 366 * to do any IO. 367 * 368 * akpm: except if we're journalling data, and write() output is 369 * also part of a shared mapping, and another thread has 370 * decided to launch a writepage() against this buffer. 371 */ 372 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 373 374 retry_alloc: 375 new_bh = alloc_buffer_head(GFP_NOFS); 376 if (!new_bh) { 377 /* 378 * Failure is not an option, but __GFP_NOFAIL is going 379 * away; so we retry ourselves here. 380 */ 381 congestion_wait(BLK_RW_ASYNC, HZ/50); 382 goto retry_alloc; 383 } 384 385 /* keep subsequent assertions sane */ 386 atomic_set(&new_bh->b_count, 1); 387 388 jbd_lock_bh_state(bh_in); 389 repeat: 390 /* 391 * If a new transaction has already done a buffer copy-out, then 392 * we use that version of the data for the commit. 393 */ 394 if (jh_in->b_frozen_data) { 395 done_copy_out = 1; 396 new_page = virt_to_page(jh_in->b_frozen_data); 397 new_offset = offset_in_page(jh_in->b_frozen_data); 398 } else { 399 new_page = jh2bh(jh_in)->b_page; 400 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 401 } 402 403 mapped_data = kmap_atomic(new_page); 404 /* 405 * Fire data frozen trigger if data already wasn't frozen. Do this 406 * before checking for escaping, as the trigger may modify the magic 407 * offset. If a copy-out happens afterwards, it will have the correct 408 * data in the buffer. 409 */ 410 if (!done_copy_out) 411 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset, 412 jh_in->b_triggers); 413 414 /* 415 * Check for escaping 416 */ 417 if (*((__be32 *)(mapped_data + new_offset)) == 418 cpu_to_be32(JBD2_MAGIC_NUMBER)) { 419 need_copy_out = 1; 420 do_escape = 1; 421 } 422 kunmap_atomic(mapped_data); 423 424 /* 425 * Do we need to do a data copy? 426 */ 427 if (need_copy_out && !done_copy_out) { 428 char *tmp; 429 430 jbd_unlock_bh_state(bh_in); 431 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 432 if (!tmp) { 433 brelse(new_bh); 434 return -ENOMEM; 435 } 436 jbd_lock_bh_state(bh_in); 437 if (jh_in->b_frozen_data) { 438 jbd2_free(tmp, bh_in->b_size); 439 goto repeat; 440 } 441 442 jh_in->b_frozen_data = tmp; 443 mapped_data = kmap_atomic(new_page); 444 memcpy(tmp, mapped_data + new_offset, bh_in->b_size); 445 kunmap_atomic(mapped_data); 446 447 new_page = virt_to_page(tmp); 448 new_offset = offset_in_page(tmp); 449 done_copy_out = 1; 450 451 /* 452 * This isn't strictly necessary, as we're using frozen 453 * data for the escaping, but it keeps consistency with 454 * b_frozen_data usage. 455 */ 456 jh_in->b_frozen_triggers = jh_in->b_triggers; 457 } 458 459 /* 460 * Did we need to do an escaping? Now we've done all the 461 * copying, we can finally do so. 462 */ 463 if (do_escape) { 464 mapped_data = kmap_atomic(new_page); 465 *((unsigned int *)(mapped_data + new_offset)) = 0; 466 kunmap_atomic(mapped_data); 467 } 468 469 set_bh_page(new_bh, new_page, new_offset); 470 new_bh->b_size = bh_in->b_size; 471 new_bh->b_bdev = journal->j_dev; 472 new_bh->b_blocknr = blocknr; 473 new_bh->b_private = bh_in; 474 set_buffer_mapped(new_bh); 475 set_buffer_dirty(new_bh); 476 477 *bh_out = new_bh; 478 479 /* 480 * The to-be-written buffer needs to get moved to the io queue, 481 * and the original buffer whose contents we are shadowing or 482 * copying is moved to the transaction's shadow queue. 483 */ 484 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 485 spin_lock(&journal->j_list_lock); 486 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 487 spin_unlock(&journal->j_list_lock); 488 set_buffer_shadow(bh_in); 489 jbd_unlock_bh_state(bh_in); 490 491 return do_escape | (done_copy_out << 1); 492 } 493 494 /* 495 * Allocation code for the journal file. Manage the space left in the 496 * journal, so that we can begin checkpointing when appropriate. 497 */ 498 499 /* 500 * Called with j_state_lock locked for writing. 501 * Returns true if a transaction commit was started. 502 */ 503 int __jbd2_log_start_commit(journal_t *journal, tid_t target) 504 { 505 /* Return if the txn has already requested to be committed */ 506 if (journal->j_commit_request == target) 507 return 0; 508 509 /* 510 * The only transaction we can possibly wait upon is the 511 * currently running transaction (if it exists). Otherwise, 512 * the target tid must be an old one. 513 */ 514 if (journal->j_running_transaction && 515 journal->j_running_transaction->t_tid == target) { 516 /* 517 * We want a new commit: OK, mark the request and wakeup the 518 * commit thread. We do _not_ do the commit ourselves. 519 */ 520 521 journal->j_commit_request = target; 522 jbd_debug(1, "JBD2: requesting commit %d/%d\n", 523 journal->j_commit_request, 524 journal->j_commit_sequence); 525 journal->j_running_transaction->t_requested = jiffies; 526 wake_up(&journal->j_wait_commit); 527 return 1; 528 } else if (!tid_geq(journal->j_commit_request, target)) 529 /* This should never happen, but if it does, preserve 530 the evidence before kjournald goes into a loop and 531 increments j_commit_sequence beyond all recognition. */ 532 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", 533 journal->j_commit_request, 534 journal->j_commit_sequence, 535 target, journal->j_running_transaction ? 536 journal->j_running_transaction->t_tid : 0); 537 return 0; 538 } 539 540 int jbd2_log_start_commit(journal_t *journal, tid_t tid) 541 { 542 int ret; 543 544 write_lock(&journal->j_state_lock); 545 ret = __jbd2_log_start_commit(journal, tid); 546 write_unlock(&journal->j_state_lock); 547 return ret; 548 } 549 550 /* 551 * Force and wait any uncommitted transactions. We can only force the running 552 * transaction if we don't have an active handle, otherwise, we will deadlock. 553 * Returns: <0 in case of error, 554 * 0 if nothing to commit, 555 * 1 if transaction was successfully committed. 556 */ 557 static int __jbd2_journal_force_commit(journal_t *journal) 558 { 559 transaction_t *transaction = NULL; 560 tid_t tid; 561 int need_to_start = 0, ret = 0; 562 563 read_lock(&journal->j_state_lock); 564 if (journal->j_running_transaction && !current->journal_info) { 565 transaction = journal->j_running_transaction; 566 if (!tid_geq(journal->j_commit_request, transaction->t_tid)) 567 need_to_start = 1; 568 } else if (journal->j_committing_transaction) 569 transaction = journal->j_committing_transaction; 570 571 if (!transaction) { 572 /* Nothing to commit */ 573 read_unlock(&journal->j_state_lock); 574 return 0; 575 } 576 tid = transaction->t_tid; 577 read_unlock(&journal->j_state_lock); 578 if (need_to_start) 579 jbd2_log_start_commit(journal, tid); 580 ret = jbd2_log_wait_commit(journal, tid); 581 if (!ret) 582 ret = 1; 583 584 return ret; 585 } 586 587 /** 588 * Force and wait upon a commit if the calling process is not within 589 * transaction. This is used for forcing out undo-protected data which contains 590 * bitmaps, when the fs is running out of space. 591 * 592 * @journal: journal to force 593 * Returns true if progress was made. 594 */ 595 int jbd2_journal_force_commit_nested(journal_t *journal) 596 { 597 int ret; 598 599 ret = __jbd2_journal_force_commit(journal); 600 return ret > 0; 601 } 602 603 /** 604 * int journal_force_commit() - force any uncommitted transactions 605 * @journal: journal to force 606 * 607 * Caller want unconditional commit. We can only force the running transaction 608 * if we don't have an active handle, otherwise, we will deadlock. 609 */ 610 int jbd2_journal_force_commit(journal_t *journal) 611 { 612 int ret; 613 614 J_ASSERT(!current->journal_info); 615 ret = __jbd2_journal_force_commit(journal); 616 if (ret > 0) 617 ret = 0; 618 return ret; 619 } 620 621 /* 622 * Start a commit of the current running transaction (if any). Returns true 623 * if a transaction is going to be committed (or is currently already 624 * committing), and fills its tid in at *ptid 625 */ 626 int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 627 { 628 int ret = 0; 629 630 write_lock(&journal->j_state_lock); 631 if (journal->j_running_transaction) { 632 tid_t tid = journal->j_running_transaction->t_tid; 633 634 __jbd2_log_start_commit(journal, tid); 635 /* There's a running transaction and we've just made sure 636 * it's commit has been scheduled. */ 637 if (ptid) 638 *ptid = tid; 639 ret = 1; 640 } else if (journal->j_committing_transaction) { 641 /* 642 * If commit has been started, then we have to wait for 643 * completion of that transaction. 644 */ 645 if (ptid) 646 *ptid = journal->j_committing_transaction->t_tid; 647 ret = 1; 648 } 649 write_unlock(&journal->j_state_lock); 650 return ret; 651 } 652 653 /* 654 * Return 1 if a given transaction has not yet sent barrier request 655 * connected with a transaction commit. If 0 is returned, transaction 656 * may or may not have sent the barrier. Used to avoid sending barrier 657 * twice in common cases. 658 */ 659 int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) 660 { 661 int ret = 0; 662 transaction_t *commit_trans; 663 664 if (!(journal->j_flags & JBD2_BARRIER)) 665 return 0; 666 read_lock(&journal->j_state_lock); 667 /* Transaction already committed? */ 668 if (tid_geq(journal->j_commit_sequence, tid)) 669 goto out; 670 commit_trans = journal->j_committing_transaction; 671 if (!commit_trans || commit_trans->t_tid != tid) { 672 ret = 1; 673 goto out; 674 } 675 /* 676 * Transaction is being committed and we already proceeded to 677 * submitting a flush to fs partition? 678 */ 679 if (journal->j_fs_dev != journal->j_dev) { 680 if (!commit_trans->t_need_data_flush || 681 commit_trans->t_state >= T_COMMIT_DFLUSH) 682 goto out; 683 } else { 684 if (commit_trans->t_state >= T_COMMIT_JFLUSH) 685 goto out; 686 } 687 ret = 1; 688 out: 689 read_unlock(&journal->j_state_lock); 690 return ret; 691 } 692 EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier); 693 694 /* 695 * Wait for a specified commit to complete. 696 * The caller may not hold the journal lock. 697 */ 698 int jbd2_log_wait_commit(journal_t *journal, tid_t tid) 699 { 700 int err = 0; 701 702 read_lock(&journal->j_state_lock); 703 #ifdef CONFIG_JBD2_DEBUG 704 if (!tid_geq(journal->j_commit_request, tid)) { 705 printk(KERN_EMERG 706 "%s: error: j_commit_request=%d, tid=%d\n", 707 __func__, journal->j_commit_request, tid); 708 } 709 #endif 710 while (tid_gt(tid, journal->j_commit_sequence)) { 711 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", 712 tid, journal->j_commit_sequence); 713 wake_up(&journal->j_wait_commit); 714 read_unlock(&journal->j_state_lock); 715 wait_event(journal->j_wait_done_commit, 716 !tid_gt(tid, journal->j_commit_sequence)); 717 read_lock(&journal->j_state_lock); 718 } 719 read_unlock(&journal->j_state_lock); 720 721 if (unlikely(is_journal_aborted(journal))) { 722 printk(KERN_EMERG "journal commit I/O error\n"); 723 err = -EIO; 724 } 725 return err; 726 } 727 728 /* 729 * When this function returns the transaction corresponding to tid 730 * will be completed. If the transaction has currently running, start 731 * committing that transaction before waiting for it to complete. If 732 * the transaction id is stale, it is by definition already completed, 733 * so just return SUCCESS. 734 */ 735 int jbd2_complete_transaction(journal_t *journal, tid_t tid) 736 { 737 int need_to_wait = 1; 738 739 read_lock(&journal->j_state_lock); 740 if (journal->j_running_transaction && 741 journal->j_running_transaction->t_tid == tid) { 742 if (journal->j_commit_request != tid) { 743 /* transaction not yet started, so request it */ 744 read_unlock(&journal->j_state_lock); 745 jbd2_log_start_commit(journal, tid); 746 goto wait_commit; 747 } 748 } else if (!(journal->j_committing_transaction && 749 journal->j_committing_transaction->t_tid == tid)) 750 need_to_wait = 0; 751 read_unlock(&journal->j_state_lock); 752 if (!need_to_wait) 753 return 0; 754 wait_commit: 755 return jbd2_log_wait_commit(journal, tid); 756 } 757 EXPORT_SYMBOL(jbd2_complete_transaction); 758 759 /* 760 * Log buffer allocation routines: 761 */ 762 763 int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) 764 { 765 unsigned long blocknr; 766 767 write_lock(&journal->j_state_lock); 768 J_ASSERT(journal->j_free > 1); 769 770 blocknr = journal->j_head; 771 journal->j_head++; 772 journal->j_free--; 773 if (journal->j_head == journal->j_last) 774 journal->j_head = journal->j_first; 775 write_unlock(&journal->j_state_lock); 776 return jbd2_journal_bmap(journal, blocknr, retp); 777 } 778 779 /* 780 * Conversion of logical to physical block numbers for the journal 781 * 782 * On external journals the journal blocks are identity-mapped, so 783 * this is a no-op. If needed, we can use j_blk_offset - everything is 784 * ready. 785 */ 786 int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, 787 unsigned long long *retp) 788 { 789 int err = 0; 790 unsigned long long ret; 791 792 if (journal->j_inode) { 793 ret = bmap(journal->j_inode, blocknr); 794 if (ret) 795 *retp = ret; 796 else { 797 printk(KERN_ALERT "%s: journal block not found " 798 "at offset %lu on %s\n", 799 __func__, blocknr, journal->j_devname); 800 err = -EIO; 801 __journal_abort_soft(journal, err); 802 } 803 } else { 804 *retp = blocknr; /* +journal->j_blk_offset */ 805 } 806 return err; 807 } 808 809 /* 810 * We play buffer_head aliasing tricks to write data/metadata blocks to 811 * the journal without copying their contents, but for journal 812 * descriptor blocks we do need to generate bona fide buffers. 813 * 814 * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying 815 * the buffer's contents they really should run flush_dcache_page(bh->b_page). 816 * But we don't bother doing that, so there will be coherency problems with 817 * mmaps of blockdevs which hold live JBD-controlled filesystems. 818 */ 819 struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) 820 { 821 struct buffer_head *bh; 822 unsigned long long blocknr; 823 int err; 824 825 err = jbd2_journal_next_log_block(journal, &blocknr); 826 827 if (err) 828 return NULL; 829 830 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 831 if (!bh) 832 return NULL; 833 lock_buffer(bh); 834 memset(bh->b_data, 0, journal->j_blocksize); 835 set_buffer_uptodate(bh); 836 unlock_buffer(bh); 837 BUFFER_TRACE(bh, "return this buffer"); 838 return bh; 839 } 840 841 /* 842 * Return tid of the oldest transaction in the journal and block in the journal 843 * where the transaction starts. 844 * 845 * If the journal is now empty, return which will be the next transaction ID 846 * we will write and where will that transaction start. 847 * 848 * The return value is 0 if journal tail cannot be pushed any further, 1 if 849 * it can. 850 */ 851 int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, 852 unsigned long *block) 853 { 854 transaction_t *transaction; 855 int ret; 856 857 read_lock(&journal->j_state_lock); 858 spin_lock(&journal->j_list_lock); 859 transaction = journal->j_checkpoint_transactions; 860 if (transaction) { 861 *tid = transaction->t_tid; 862 *block = transaction->t_log_start; 863 } else if ((transaction = journal->j_committing_transaction) != NULL) { 864 *tid = transaction->t_tid; 865 *block = transaction->t_log_start; 866 } else if ((transaction = journal->j_running_transaction) != NULL) { 867 *tid = transaction->t_tid; 868 *block = journal->j_head; 869 } else { 870 *tid = journal->j_transaction_sequence; 871 *block = journal->j_head; 872 } 873 ret = tid_gt(*tid, journal->j_tail_sequence); 874 spin_unlock(&journal->j_list_lock); 875 read_unlock(&journal->j_state_lock); 876 877 return ret; 878 } 879 880 /* 881 * Update information in journal structure and in on disk journal superblock 882 * about log tail. This function does not check whether information passed in 883 * really pushes log tail further. It's responsibility of the caller to make 884 * sure provided log tail information is valid (e.g. by holding 885 * j_checkpoint_mutex all the time between computing log tail and calling this 886 * function as is the case with jbd2_cleanup_journal_tail()). 887 * 888 * Requires j_checkpoint_mutex 889 */ 890 void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 891 { 892 unsigned long freed; 893 894 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 895 896 /* 897 * We cannot afford for write to remain in drive's caches since as 898 * soon as we update j_tail, next transaction can start reusing journal 899 * space and if we lose sb update during power failure we'd replay 900 * old transaction with possibly newly overwritten data. 901 */ 902 jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); 903 write_lock(&journal->j_state_lock); 904 freed = block - journal->j_tail; 905 if (block < journal->j_tail) 906 freed += journal->j_last - journal->j_first; 907 908 trace_jbd2_update_log_tail(journal, tid, block, freed); 909 jbd_debug(1, 910 "Cleaning journal tail from %d to %d (offset %lu), " 911 "freeing %lu\n", 912 journal->j_tail_sequence, tid, block, freed); 913 914 journal->j_free += freed; 915 journal->j_tail_sequence = tid; 916 journal->j_tail = block; 917 write_unlock(&journal->j_state_lock); 918 } 919 920 /* 921 * This is a variaon of __jbd2_update_log_tail which checks for validity of 922 * provided log tail and locks j_checkpoint_mutex. So it is safe against races 923 * with other threads updating log tail. 924 */ 925 void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 926 { 927 mutex_lock(&journal->j_checkpoint_mutex); 928 if (tid_gt(tid, journal->j_tail_sequence)) 929 __jbd2_update_log_tail(journal, tid, block); 930 mutex_unlock(&journal->j_checkpoint_mutex); 931 } 932 933 struct jbd2_stats_proc_session { 934 journal_t *journal; 935 struct transaction_stats_s *stats; 936 int start; 937 int max; 938 }; 939 940 static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) 941 { 942 return *pos ? NULL : SEQ_START_TOKEN; 943 } 944 945 static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) 946 { 947 return NULL; 948 } 949 950 static int jbd2_seq_info_show(struct seq_file *seq, void *v) 951 { 952 struct jbd2_stats_proc_session *s = seq->private; 953 954 if (v != SEQ_START_TOKEN) 955 return 0; 956 seq_printf(seq, "%lu transactions (%lu requested), " 957 "each up to %u blocks\n", 958 s->stats->ts_tid, s->stats->ts_requested, 959 s->journal->j_max_transaction_buffers); 960 if (s->stats->ts_tid == 0) 961 return 0; 962 seq_printf(seq, "average: \n %ums waiting for transaction\n", 963 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid)); 964 seq_printf(seq, " %ums request delay\n", 965 (s->stats->ts_requested == 0) ? 0 : 966 jiffies_to_msecs(s->stats->run.rs_request_delay / 967 s->stats->ts_requested)); 968 seq_printf(seq, " %ums running transaction\n", 969 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid)); 970 seq_printf(seq, " %ums transaction was being locked\n", 971 jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid)); 972 seq_printf(seq, " %ums flushing data (in ordered mode)\n", 973 jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid)); 974 seq_printf(seq, " %ums logging transaction\n", 975 jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid)); 976 seq_printf(seq, " %lluus average transaction commit time\n", 977 div_u64(s->journal->j_average_commit_time, 1000)); 978 seq_printf(seq, " %lu handles per transaction\n", 979 s->stats->run.rs_handle_count / s->stats->ts_tid); 980 seq_printf(seq, " %lu blocks per transaction\n", 981 s->stats->run.rs_blocks / s->stats->ts_tid); 982 seq_printf(seq, " %lu logged blocks per transaction\n", 983 s->stats->run.rs_blocks_logged / s->stats->ts_tid); 984 return 0; 985 } 986 987 static void jbd2_seq_info_stop(struct seq_file *seq, void *v) 988 { 989 } 990 991 static const struct seq_operations jbd2_seq_info_ops = { 992 .start = jbd2_seq_info_start, 993 .next = jbd2_seq_info_next, 994 .stop = jbd2_seq_info_stop, 995 .show = jbd2_seq_info_show, 996 }; 997 998 static int jbd2_seq_info_open(struct inode *inode, struct file *file) 999 { 1000 journal_t *journal = PDE_DATA(inode); 1001 struct jbd2_stats_proc_session *s; 1002 int rc, size; 1003 1004 s = kmalloc(sizeof(*s), GFP_KERNEL); 1005 if (s == NULL) 1006 return -ENOMEM; 1007 size = sizeof(struct transaction_stats_s); 1008 s->stats = kmalloc(size, GFP_KERNEL); 1009 if (s->stats == NULL) { 1010 kfree(s); 1011 return -ENOMEM; 1012 } 1013 spin_lock(&journal->j_history_lock); 1014 memcpy(s->stats, &journal->j_stats, size); 1015 s->journal = journal; 1016 spin_unlock(&journal->j_history_lock); 1017 1018 rc = seq_open(file, &jbd2_seq_info_ops); 1019 if (rc == 0) { 1020 struct seq_file *m = file->private_data; 1021 m->private = s; 1022 } else { 1023 kfree(s->stats); 1024 kfree(s); 1025 } 1026 return rc; 1027 1028 } 1029 1030 static int jbd2_seq_info_release(struct inode *inode, struct file *file) 1031 { 1032 struct seq_file *seq = file->private_data; 1033 struct jbd2_stats_proc_session *s = seq->private; 1034 kfree(s->stats); 1035 kfree(s); 1036 return seq_release(inode, file); 1037 } 1038 1039 static const struct file_operations jbd2_seq_info_fops = { 1040 .owner = THIS_MODULE, 1041 .open = jbd2_seq_info_open, 1042 .read = seq_read, 1043 .llseek = seq_lseek, 1044 .release = jbd2_seq_info_release, 1045 }; 1046 1047 static struct proc_dir_entry *proc_jbd2_stats; 1048 1049 static void jbd2_stats_proc_init(journal_t *journal) 1050 { 1051 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); 1052 if (journal->j_proc_entry) { 1053 proc_create_data("info", S_IRUGO, journal->j_proc_entry, 1054 &jbd2_seq_info_fops, journal); 1055 } 1056 } 1057 1058 static void jbd2_stats_proc_exit(journal_t *journal) 1059 { 1060 remove_proc_entry("info", journal->j_proc_entry); 1061 remove_proc_entry(journal->j_devname, proc_jbd2_stats); 1062 } 1063 1064 /* 1065 * Management for journal control blocks: functions to create and 1066 * destroy journal_t structures, and to initialise and read existing 1067 * journal blocks from disk. */ 1068 1069 /* First: create and setup a journal_t object in memory. We initialise 1070 * very few fields yet: that has to wait until we have created the 1071 * journal structures from from scratch, or loaded them from disk. */ 1072 1073 static journal_t * journal_init_common (void) 1074 { 1075 journal_t *journal; 1076 int err; 1077 1078 journal = kzalloc(sizeof(*journal), GFP_KERNEL); 1079 if (!journal) 1080 return NULL; 1081 1082 init_waitqueue_head(&journal->j_wait_transaction_locked); 1083 init_waitqueue_head(&journal->j_wait_done_commit); 1084 init_waitqueue_head(&journal->j_wait_commit); 1085 init_waitqueue_head(&journal->j_wait_updates); 1086 init_waitqueue_head(&journal->j_wait_reserved); 1087 mutex_init(&journal->j_barrier); 1088 mutex_init(&journal->j_checkpoint_mutex); 1089 spin_lock_init(&journal->j_revoke_lock); 1090 spin_lock_init(&journal->j_list_lock); 1091 rwlock_init(&journal->j_state_lock); 1092 1093 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 1094 journal->j_min_batch_time = 0; 1095 journal->j_max_batch_time = 15000; /* 15ms */ 1096 atomic_set(&journal->j_reserved_credits, 0); 1097 1098 /* The journal is marked for error until we succeed with recovery! */ 1099 journal->j_flags = JBD2_ABORT; 1100 1101 /* Set up a default-sized revoke table for the new mount. */ 1102 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); 1103 if (err) { 1104 kfree(journal); 1105 return NULL; 1106 } 1107 1108 spin_lock_init(&journal->j_history_lock); 1109 1110 return journal; 1111 } 1112 1113 /* jbd2_journal_init_dev and jbd2_journal_init_inode: 1114 * 1115 * Create a journal structure assigned some fixed set of disk blocks to 1116 * the journal. We don't actually touch those disk blocks yet, but we 1117 * need to set up all of the mapping information to tell the journaling 1118 * system where the journal blocks are. 1119 * 1120 */ 1121 1122 /** 1123 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure 1124 * @bdev: Block device on which to create the journal 1125 * @fs_dev: Device which hold journalled filesystem for this journal. 1126 * @start: Block nr Start of journal. 1127 * @len: Length of the journal in blocks. 1128 * @blocksize: blocksize of journalling device 1129 * 1130 * Returns: a newly created journal_t * 1131 * 1132 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 1133 * range of blocks on an arbitrary block device. 1134 * 1135 */ 1136 journal_t * jbd2_journal_init_dev(struct block_device *bdev, 1137 struct block_device *fs_dev, 1138 unsigned long long start, int len, int blocksize) 1139 { 1140 journal_t *journal = journal_init_common(); 1141 struct buffer_head *bh; 1142 char *p; 1143 int n; 1144 1145 if (!journal) 1146 return NULL; 1147 1148 /* journal descriptor can store up to n blocks -bzzz */ 1149 journal->j_blocksize = blocksize; 1150 journal->j_dev = bdev; 1151 journal->j_fs_dev = fs_dev; 1152 journal->j_blk_offset = start; 1153 journal->j_maxlen = len; 1154 bdevname(journal->j_dev, journal->j_devname); 1155 p = journal->j_devname; 1156 while ((p = strchr(p, '/'))) 1157 *p = '!'; 1158 jbd2_stats_proc_init(journal); 1159 n = journal->j_blocksize / sizeof(journal_block_tag_t); 1160 journal->j_wbufsize = n; 1161 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1162 if (!journal->j_wbuf) { 1163 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", 1164 __func__); 1165 goto out_err; 1166 } 1167 1168 bh = __getblk(journal->j_dev, start, journal->j_blocksize); 1169 if (!bh) { 1170 printk(KERN_ERR 1171 "%s: Cannot get buffer for journal superblock\n", 1172 __func__); 1173 goto out_err; 1174 } 1175 journal->j_sb_buffer = bh; 1176 journal->j_superblock = (journal_superblock_t *)bh->b_data; 1177 1178 return journal; 1179 out_err: 1180 kfree(journal->j_wbuf); 1181 jbd2_stats_proc_exit(journal); 1182 kfree(journal); 1183 return NULL; 1184 } 1185 1186 /** 1187 * journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode. 1188 * @inode: An inode to create the journal in 1189 * 1190 * jbd2_journal_init_inode creates a journal which maps an on-disk inode as 1191 * the journal. The inode must exist already, must support bmap() and 1192 * must have all data blocks preallocated. 1193 */ 1194 journal_t * jbd2_journal_init_inode (struct inode *inode) 1195 { 1196 struct buffer_head *bh; 1197 journal_t *journal = journal_init_common(); 1198 char *p; 1199 int err; 1200 int n; 1201 unsigned long long blocknr; 1202 1203 if (!journal) 1204 return NULL; 1205 1206 journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; 1207 journal->j_inode = inode; 1208 bdevname(journal->j_dev, journal->j_devname); 1209 p = journal->j_devname; 1210 while ((p = strchr(p, '/'))) 1211 *p = '!'; 1212 p = journal->j_devname + strlen(journal->j_devname); 1213 sprintf(p, "-%lu", journal->j_inode->i_ino); 1214 jbd_debug(1, 1215 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", 1216 journal, inode->i_sb->s_id, inode->i_ino, 1217 (long long) inode->i_size, 1218 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 1219 1220 journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; 1221 journal->j_blocksize = inode->i_sb->s_blocksize; 1222 jbd2_stats_proc_init(journal); 1223 1224 /* journal descriptor can store up to n blocks -bzzz */ 1225 n = journal->j_blocksize / sizeof(journal_block_tag_t); 1226 journal->j_wbufsize = n; 1227 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1228 if (!journal->j_wbuf) { 1229 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", 1230 __func__); 1231 goto out_err; 1232 } 1233 1234 err = jbd2_journal_bmap(journal, 0, &blocknr); 1235 /* If that failed, give up */ 1236 if (err) { 1237 printk(KERN_ERR "%s: Cannot locate journal superblock\n", 1238 __func__); 1239 goto out_err; 1240 } 1241 1242 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 1243 if (!bh) { 1244 printk(KERN_ERR 1245 "%s: Cannot get buffer for journal superblock\n", 1246 __func__); 1247 goto out_err; 1248 } 1249 journal->j_sb_buffer = bh; 1250 journal->j_superblock = (journal_superblock_t *)bh->b_data; 1251 1252 return journal; 1253 out_err: 1254 kfree(journal->j_wbuf); 1255 jbd2_stats_proc_exit(journal); 1256 kfree(journal); 1257 return NULL; 1258 } 1259 1260 /* 1261 * If the journal init or create aborts, we need to mark the journal 1262 * superblock as being NULL to prevent the journal destroy from writing 1263 * back a bogus superblock. 1264 */ 1265 static void journal_fail_superblock (journal_t *journal) 1266 { 1267 struct buffer_head *bh = journal->j_sb_buffer; 1268 brelse(bh); 1269 journal->j_sb_buffer = NULL; 1270 } 1271 1272 /* 1273 * Given a journal_t structure, initialise the various fields for 1274 * startup of a new journaling session. We use this both when creating 1275 * a journal, and after recovering an old journal to reset it for 1276 * subsequent use. 1277 */ 1278 1279 static int journal_reset(journal_t *journal) 1280 { 1281 journal_superblock_t *sb = journal->j_superblock; 1282 unsigned long long first, last; 1283 1284 first = be32_to_cpu(sb->s_first); 1285 last = be32_to_cpu(sb->s_maxlen); 1286 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { 1287 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n", 1288 first, last); 1289 journal_fail_superblock(journal); 1290 return -EINVAL; 1291 } 1292 1293 journal->j_first = first; 1294 journal->j_last = last; 1295 1296 journal->j_head = first; 1297 journal->j_tail = first; 1298 journal->j_free = last - first; 1299 1300 journal->j_tail_sequence = journal->j_transaction_sequence; 1301 journal->j_commit_sequence = journal->j_transaction_sequence - 1; 1302 journal->j_commit_request = journal->j_commit_sequence; 1303 1304 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1305 1306 /* 1307 * As a special case, if the on-disk copy is already marked as needing 1308 * no recovery (s_start == 0), then we can safely defer the superblock 1309 * update until the next commit by setting JBD2_FLUSHED. This avoids 1310 * attempting a write to a potential-readonly device. 1311 */ 1312 if (sb->s_start == 0) { 1313 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " 1314 "(start %ld, seq %d, errno %d)\n", 1315 journal->j_tail, journal->j_tail_sequence, 1316 journal->j_errno); 1317 journal->j_flags |= JBD2_FLUSHED; 1318 } else { 1319 /* Lock here to make assertions happy... */ 1320 mutex_lock(&journal->j_checkpoint_mutex); 1321 /* 1322 * Update log tail information. We use WRITE_FUA since new 1323 * transaction will start reusing journal space and so we 1324 * must make sure information about current log tail is on 1325 * disk before that. 1326 */ 1327 jbd2_journal_update_sb_log_tail(journal, 1328 journal->j_tail_sequence, 1329 journal->j_tail, 1330 WRITE_FUA); 1331 mutex_unlock(&journal->j_checkpoint_mutex); 1332 } 1333 return jbd2_journal_start_thread(journal); 1334 } 1335 1336 static void jbd2_write_superblock(journal_t *journal, int write_op) 1337 { 1338 struct buffer_head *bh = journal->j_sb_buffer; 1339 journal_superblock_t *sb = journal->j_superblock; 1340 int ret; 1341 1342 trace_jbd2_write_superblock(journal, write_op); 1343 if (!(journal->j_flags & JBD2_BARRIER)) 1344 write_op &= ~(REQ_FUA | REQ_FLUSH); 1345 lock_buffer(bh); 1346 if (buffer_write_io_error(bh)) { 1347 /* 1348 * Oh, dear. A previous attempt to write the journal 1349 * superblock failed. This could happen because the 1350 * USB device was yanked out. Or it could happen to 1351 * be a transient write error and maybe the block will 1352 * be remapped. Nothing we can do but to retry the 1353 * write and hope for the best. 1354 */ 1355 printk(KERN_ERR "JBD2: previous I/O error detected " 1356 "for journal superblock update for %s.\n", 1357 journal->j_devname); 1358 clear_buffer_write_io_error(bh); 1359 set_buffer_uptodate(bh); 1360 } 1361 jbd2_superblock_csum_set(journal, sb); 1362 get_bh(bh); 1363 bh->b_end_io = end_buffer_write_sync; 1364 ret = submit_bh(write_op, bh); 1365 wait_on_buffer(bh); 1366 if (buffer_write_io_error(bh)) { 1367 clear_buffer_write_io_error(bh); 1368 set_buffer_uptodate(bh); 1369 ret = -EIO; 1370 } 1371 if (ret) { 1372 printk(KERN_ERR "JBD2: Error %d detected when updating " 1373 "journal superblock for %s.\n", ret, 1374 journal->j_devname); 1375 } 1376 } 1377 1378 /** 1379 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. 1380 * @journal: The journal to update. 1381 * @tail_tid: TID of the new transaction at the tail of the log 1382 * @tail_block: The first block of the transaction at the tail of the log 1383 * @write_op: With which operation should we write the journal sb 1384 * 1385 * Update a journal's superblock information about log tail and write it to 1386 * disk, waiting for the IO to complete. 1387 */ 1388 void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, 1389 unsigned long tail_block, int write_op) 1390 { 1391 journal_superblock_t *sb = journal->j_superblock; 1392 1393 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1394 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", 1395 tail_block, tail_tid); 1396 1397 sb->s_sequence = cpu_to_be32(tail_tid); 1398 sb->s_start = cpu_to_be32(tail_block); 1399 1400 jbd2_write_superblock(journal, write_op); 1401 1402 /* Log is no longer empty */ 1403 write_lock(&journal->j_state_lock); 1404 WARN_ON(!sb->s_sequence); 1405 journal->j_flags &= ~JBD2_FLUSHED; 1406 write_unlock(&journal->j_state_lock); 1407 } 1408 1409 /** 1410 * jbd2_mark_journal_empty() - Mark on disk journal as empty. 1411 * @journal: The journal to update. 1412 * 1413 * Update a journal's dynamic superblock fields to show that journal is empty. 1414 * Write updated superblock to disk waiting for IO to complete. 1415 */ 1416 static void jbd2_mark_journal_empty(journal_t *journal) 1417 { 1418 journal_superblock_t *sb = journal->j_superblock; 1419 1420 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1421 read_lock(&journal->j_state_lock); 1422 /* Is it already empty? */ 1423 if (sb->s_start == 0) { 1424 read_unlock(&journal->j_state_lock); 1425 return; 1426 } 1427 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", 1428 journal->j_tail_sequence); 1429 1430 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1431 sb->s_start = cpu_to_be32(0); 1432 read_unlock(&journal->j_state_lock); 1433 1434 jbd2_write_superblock(journal, WRITE_FUA); 1435 1436 /* Log is no longer empty */ 1437 write_lock(&journal->j_state_lock); 1438 journal->j_flags |= JBD2_FLUSHED; 1439 write_unlock(&journal->j_state_lock); 1440 } 1441 1442 1443 /** 1444 * jbd2_journal_update_sb_errno() - Update error in the journal. 1445 * @journal: The journal to update. 1446 * 1447 * Update a journal's errno. Write updated superblock to disk waiting for IO 1448 * to complete. 1449 */ 1450 void jbd2_journal_update_sb_errno(journal_t *journal) 1451 { 1452 journal_superblock_t *sb = journal->j_superblock; 1453 1454 read_lock(&journal->j_state_lock); 1455 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", 1456 journal->j_errno); 1457 sb->s_errno = cpu_to_be32(journal->j_errno); 1458 read_unlock(&journal->j_state_lock); 1459 1460 jbd2_write_superblock(journal, WRITE_SYNC); 1461 } 1462 EXPORT_SYMBOL(jbd2_journal_update_sb_errno); 1463 1464 /* 1465 * Read the superblock for a given journal, performing initial 1466 * validation of the format. 1467 */ 1468 static int journal_get_superblock(journal_t *journal) 1469 { 1470 struct buffer_head *bh; 1471 journal_superblock_t *sb; 1472 int err = -EIO; 1473 1474 bh = journal->j_sb_buffer; 1475 1476 J_ASSERT(bh != NULL); 1477 if (!buffer_uptodate(bh)) { 1478 ll_rw_block(READ, 1, &bh); 1479 wait_on_buffer(bh); 1480 if (!buffer_uptodate(bh)) { 1481 printk(KERN_ERR 1482 "JBD2: IO error reading journal superblock\n"); 1483 goto out; 1484 } 1485 } 1486 1487 if (buffer_verified(bh)) 1488 return 0; 1489 1490 sb = journal->j_superblock; 1491 1492 err = -EINVAL; 1493 1494 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 1495 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1496 printk(KERN_WARNING "JBD2: no valid journal superblock found\n"); 1497 goto out; 1498 } 1499 1500 switch(be32_to_cpu(sb->s_header.h_blocktype)) { 1501 case JBD2_SUPERBLOCK_V1: 1502 journal->j_format_version = 1; 1503 break; 1504 case JBD2_SUPERBLOCK_V2: 1505 journal->j_format_version = 2; 1506 break; 1507 default: 1508 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n"); 1509 goto out; 1510 } 1511 1512 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) 1513 journal->j_maxlen = be32_to_cpu(sb->s_maxlen); 1514 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { 1515 printk(KERN_WARNING "JBD2: journal file too short\n"); 1516 goto out; 1517 } 1518 1519 if (be32_to_cpu(sb->s_first) == 0 || 1520 be32_to_cpu(sb->s_first) >= journal->j_maxlen) { 1521 printk(KERN_WARNING 1522 "JBD2: Invalid start block of journal: %u\n", 1523 be32_to_cpu(sb->s_first)); 1524 goto out; 1525 } 1526 1527 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1528 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1529 /* Can't have checksum v1 and v2 on at the same time! */ 1530 printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " 1531 "at the same time!\n"); 1532 goto out; 1533 } 1534 1535 if (!jbd2_verify_csum_type(journal, sb)) { 1536 printk(KERN_ERR "JBD: Unknown checksum type\n"); 1537 goto out; 1538 } 1539 1540 /* Load the checksum driver */ 1541 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1542 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1543 if (IS_ERR(journal->j_chksum_driver)) { 1544 printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); 1545 err = PTR_ERR(journal->j_chksum_driver); 1546 journal->j_chksum_driver = NULL; 1547 goto out; 1548 } 1549 } 1550 1551 /* Check superblock checksum */ 1552 if (!jbd2_superblock_csum_verify(journal, sb)) { 1553 printk(KERN_ERR "JBD: journal checksum error\n"); 1554 goto out; 1555 } 1556 1557 /* Precompute checksum seed for all metadata */ 1558 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1559 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1560 sizeof(sb->s_uuid)); 1561 1562 set_buffer_verified(bh); 1563 1564 return 0; 1565 1566 out: 1567 journal_fail_superblock(journal); 1568 return err; 1569 } 1570 1571 /* 1572 * Load the on-disk journal superblock and read the key fields into the 1573 * journal_t. 1574 */ 1575 1576 static int load_superblock(journal_t *journal) 1577 { 1578 int err; 1579 journal_superblock_t *sb; 1580 1581 err = journal_get_superblock(journal); 1582 if (err) 1583 return err; 1584 1585 sb = journal->j_superblock; 1586 1587 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); 1588 journal->j_tail = be32_to_cpu(sb->s_start); 1589 journal->j_first = be32_to_cpu(sb->s_first); 1590 journal->j_last = be32_to_cpu(sb->s_maxlen); 1591 journal->j_errno = be32_to_cpu(sb->s_errno); 1592 1593 return 0; 1594 } 1595 1596 1597 /** 1598 * int jbd2_journal_load() - Read journal from disk. 1599 * @journal: Journal to act on. 1600 * 1601 * Given a journal_t structure which tells us which disk blocks contain 1602 * a journal, read the journal from disk to initialise the in-memory 1603 * structures. 1604 */ 1605 int jbd2_journal_load(journal_t *journal) 1606 { 1607 int err; 1608 journal_superblock_t *sb; 1609 1610 err = load_superblock(journal); 1611 if (err) 1612 return err; 1613 1614 sb = journal->j_superblock; 1615 /* If this is a V2 superblock, then we have to check the 1616 * features flags on it. */ 1617 1618 if (journal->j_format_version >= 2) { 1619 if ((sb->s_feature_ro_compat & 1620 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 1621 (sb->s_feature_incompat & 1622 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 1623 printk(KERN_WARNING 1624 "JBD2: Unrecognised features on journal\n"); 1625 return -EINVAL; 1626 } 1627 } 1628 1629 /* 1630 * Create a slab for this blocksize 1631 */ 1632 err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); 1633 if (err) 1634 return err; 1635 1636 /* Let the recovery code check whether it needs to recover any 1637 * data from the journal. */ 1638 if (jbd2_journal_recover(journal)) 1639 goto recovery_error; 1640 1641 if (journal->j_failed_commit) { 1642 printk(KERN_ERR "JBD2: journal transaction %u on %s " 1643 "is corrupt.\n", journal->j_failed_commit, 1644 journal->j_devname); 1645 return -EIO; 1646 } 1647 1648 /* OK, we've finished with the dynamic journal bits: 1649 * reinitialise the dynamic contents of the superblock in memory 1650 * and reset them on disk. */ 1651 if (journal_reset(journal)) 1652 goto recovery_error; 1653 1654 journal->j_flags &= ~JBD2_ABORT; 1655 journal->j_flags |= JBD2_LOADED; 1656 return 0; 1657 1658 recovery_error: 1659 printk(KERN_WARNING "JBD2: recovery failed\n"); 1660 return -EIO; 1661 } 1662 1663 /** 1664 * void jbd2_journal_destroy() - Release a journal_t structure. 1665 * @journal: Journal to act on. 1666 * 1667 * Release a journal_t structure once it is no longer in use by the 1668 * journaled object. 1669 * Return <0 if we couldn't clean up the journal. 1670 */ 1671 int jbd2_journal_destroy(journal_t *journal) 1672 { 1673 int err = 0; 1674 1675 /* Wait for the commit thread to wake up and die. */ 1676 journal_kill_thread(journal); 1677 1678 /* Force a final log commit */ 1679 if (journal->j_running_transaction) 1680 jbd2_journal_commit_transaction(journal); 1681 1682 /* Force any old transactions to disk */ 1683 1684 /* Totally anal locking here... */ 1685 spin_lock(&journal->j_list_lock); 1686 while (journal->j_checkpoint_transactions != NULL) { 1687 spin_unlock(&journal->j_list_lock); 1688 mutex_lock(&journal->j_checkpoint_mutex); 1689 jbd2_log_do_checkpoint(journal); 1690 mutex_unlock(&journal->j_checkpoint_mutex); 1691 spin_lock(&journal->j_list_lock); 1692 } 1693 1694 J_ASSERT(journal->j_running_transaction == NULL); 1695 J_ASSERT(journal->j_committing_transaction == NULL); 1696 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1697 spin_unlock(&journal->j_list_lock); 1698 1699 if (journal->j_sb_buffer) { 1700 if (!is_journal_aborted(journal)) { 1701 mutex_lock(&journal->j_checkpoint_mutex); 1702 jbd2_mark_journal_empty(journal); 1703 mutex_unlock(&journal->j_checkpoint_mutex); 1704 } else 1705 err = -EIO; 1706 brelse(journal->j_sb_buffer); 1707 } 1708 1709 if (journal->j_proc_entry) 1710 jbd2_stats_proc_exit(journal); 1711 if (journal->j_inode) 1712 iput(journal->j_inode); 1713 if (journal->j_revoke) 1714 jbd2_journal_destroy_revoke(journal); 1715 if (journal->j_chksum_driver) 1716 crypto_free_shash(journal->j_chksum_driver); 1717 kfree(journal->j_wbuf); 1718 kfree(journal); 1719 1720 return err; 1721 } 1722 1723 1724 /** 1725 *int jbd2_journal_check_used_features () - Check if features specified are used. 1726 * @journal: Journal to check. 1727 * @compat: bitmask of compatible features 1728 * @ro: bitmask of features that force read-only mount 1729 * @incompat: bitmask of incompatible features 1730 * 1731 * Check whether the journal uses all of a given set of 1732 * features. Return true (non-zero) if it does. 1733 **/ 1734 1735 int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, 1736 unsigned long ro, unsigned long incompat) 1737 { 1738 journal_superblock_t *sb; 1739 1740 if (!compat && !ro && !incompat) 1741 return 1; 1742 /* Load journal superblock if it is not loaded yet. */ 1743 if (journal->j_format_version == 0 && 1744 journal_get_superblock(journal) != 0) 1745 return 0; 1746 if (journal->j_format_version == 1) 1747 return 0; 1748 1749 sb = journal->j_superblock; 1750 1751 if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && 1752 ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && 1753 ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) 1754 return 1; 1755 1756 return 0; 1757 } 1758 1759 /** 1760 * int jbd2_journal_check_available_features() - Check feature set in journalling layer 1761 * @journal: Journal to check. 1762 * @compat: bitmask of compatible features 1763 * @ro: bitmask of features that force read-only mount 1764 * @incompat: bitmask of incompatible features 1765 * 1766 * Check whether the journaling code supports the use of 1767 * all of a given set of features on this journal. Return true 1768 * (non-zero) if it can. */ 1769 1770 int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, 1771 unsigned long ro, unsigned long incompat) 1772 { 1773 if (!compat && !ro && !incompat) 1774 return 1; 1775 1776 /* We can support any known requested features iff the 1777 * superblock is in version 2. Otherwise we fail to support any 1778 * extended sb features. */ 1779 1780 if (journal->j_format_version != 2) 1781 return 0; 1782 1783 if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat && 1784 (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro && 1785 (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat) 1786 return 1; 1787 1788 return 0; 1789 } 1790 1791 /** 1792 * int jbd2_journal_set_features () - Mark a given journal feature in the superblock 1793 * @journal: Journal to act on. 1794 * @compat: bitmask of compatible features 1795 * @ro: bitmask of features that force read-only mount 1796 * @incompat: bitmask of incompatible features 1797 * 1798 * Mark a given journal feature as present on the 1799 * superblock. Returns true if the requested features could be set. 1800 * 1801 */ 1802 1803 int jbd2_journal_set_features (journal_t *journal, unsigned long compat, 1804 unsigned long ro, unsigned long incompat) 1805 { 1806 #define INCOMPAT_FEATURE_ON(f) \ 1807 ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f))) 1808 #define COMPAT_FEATURE_ON(f) \ 1809 ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f))) 1810 journal_superblock_t *sb; 1811 1812 if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) 1813 return 1; 1814 1815 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1816 return 0; 1817 1818 /* Asking for checksumming v2 and v1? Only give them v2. */ 1819 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && 1820 compat & JBD2_FEATURE_COMPAT_CHECKSUM) 1821 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 1822 1823 jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", 1824 compat, ro, incompat); 1825 1826 sb = journal->j_superblock; 1827 1828 /* If enabling v2 checksums, update superblock */ 1829 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1830 sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1831 sb->s_feature_compat &= 1832 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 1833 1834 /* Load the checksum driver */ 1835 if (journal->j_chksum_driver == NULL) { 1836 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 1837 0, 0); 1838 if (IS_ERR(journal->j_chksum_driver)) { 1839 printk(KERN_ERR "JBD: Cannot load crc32c " 1840 "driver.\n"); 1841 journal->j_chksum_driver = NULL; 1842 return 0; 1843 } 1844 } 1845 1846 /* Precompute checksum seed for all metadata */ 1847 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 1848 JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1849 journal->j_csum_seed = jbd2_chksum(journal, ~0, 1850 sb->s_uuid, 1851 sizeof(sb->s_uuid)); 1852 } 1853 1854 /* If enabling v1 checksums, downgrade superblock */ 1855 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 1856 sb->s_feature_incompat &= 1857 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); 1858 1859 sb->s_feature_compat |= cpu_to_be32(compat); 1860 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1861 sb->s_feature_incompat |= cpu_to_be32(incompat); 1862 1863 return 1; 1864 #undef COMPAT_FEATURE_ON 1865 #undef INCOMPAT_FEATURE_ON 1866 } 1867 1868 /* 1869 * jbd2_journal_clear_features () - Clear a given journal feature in the 1870 * superblock 1871 * @journal: Journal to act on. 1872 * @compat: bitmask of compatible features 1873 * @ro: bitmask of features that force read-only mount 1874 * @incompat: bitmask of incompatible features 1875 * 1876 * Clear a given journal feature as present on the 1877 * superblock. 1878 */ 1879 void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, 1880 unsigned long ro, unsigned long incompat) 1881 { 1882 journal_superblock_t *sb; 1883 1884 jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n", 1885 compat, ro, incompat); 1886 1887 sb = journal->j_superblock; 1888 1889 sb->s_feature_compat &= ~cpu_to_be32(compat); 1890 sb->s_feature_ro_compat &= ~cpu_to_be32(ro); 1891 sb->s_feature_incompat &= ~cpu_to_be32(incompat); 1892 } 1893 EXPORT_SYMBOL(jbd2_journal_clear_features); 1894 1895 /** 1896 * int jbd2_journal_flush () - Flush journal 1897 * @journal: Journal to act on. 1898 * 1899 * Flush all data for a given journal to disk and empty the journal. 1900 * Filesystems can use this when remounting readonly to ensure that 1901 * recovery does not need to happen on remount. 1902 */ 1903 1904 int jbd2_journal_flush(journal_t *journal) 1905 { 1906 int err = 0; 1907 transaction_t *transaction = NULL; 1908 1909 write_lock(&journal->j_state_lock); 1910 1911 /* Force everything buffered to the log... */ 1912 if (journal->j_running_transaction) { 1913 transaction = journal->j_running_transaction; 1914 __jbd2_log_start_commit(journal, transaction->t_tid); 1915 } else if (journal->j_committing_transaction) 1916 transaction = journal->j_committing_transaction; 1917 1918 /* Wait for the log commit to complete... */ 1919 if (transaction) { 1920 tid_t tid = transaction->t_tid; 1921 1922 write_unlock(&journal->j_state_lock); 1923 jbd2_log_wait_commit(journal, tid); 1924 } else { 1925 write_unlock(&journal->j_state_lock); 1926 } 1927 1928 /* ...and flush everything in the log out to disk. */ 1929 spin_lock(&journal->j_list_lock); 1930 while (!err && journal->j_checkpoint_transactions != NULL) { 1931 spin_unlock(&journal->j_list_lock); 1932 mutex_lock(&journal->j_checkpoint_mutex); 1933 err = jbd2_log_do_checkpoint(journal); 1934 mutex_unlock(&journal->j_checkpoint_mutex); 1935 spin_lock(&journal->j_list_lock); 1936 } 1937 spin_unlock(&journal->j_list_lock); 1938 1939 if (is_journal_aborted(journal)) 1940 return -EIO; 1941 1942 mutex_lock(&journal->j_checkpoint_mutex); 1943 jbd2_cleanup_journal_tail(journal); 1944 1945 /* Finally, mark the journal as really needing no recovery. 1946 * This sets s_start==0 in the underlying superblock, which is 1947 * the magic code for a fully-recovered superblock. Any future 1948 * commits of data to the journal will restore the current 1949 * s_start value. */ 1950 jbd2_mark_journal_empty(journal); 1951 mutex_unlock(&journal->j_checkpoint_mutex); 1952 write_lock(&journal->j_state_lock); 1953 J_ASSERT(!journal->j_running_transaction); 1954 J_ASSERT(!journal->j_committing_transaction); 1955 J_ASSERT(!journal->j_checkpoint_transactions); 1956 J_ASSERT(journal->j_head == journal->j_tail); 1957 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1958 write_unlock(&journal->j_state_lock); 1959 return 0; 1960 } 1961 1962 /** 1963 * int jbd2_journal_wipe() - Wipe journal contents 1964 * @journal: Journal to act on. 1965 * @write: flag (see below) 1966 * 1967 * Wipe out all of the contents of a journal, safely. This will produce 1968 * a warning if the journal contains any valid recovery information. 1969 * Must be called between journal_init_*() and jbd2_journal_load(). 1970 * 1971 * If 'write' is non-zero, then we wipe out the journal on disk; otherwise 1972 * we merely suppress recovery. 1973 */ 1974 1975 int jbd2_journal_wipe(journal_t *journal, int write) 1976 { 1977 int err = 0; 1978 1979 J_ASSERT (!(journal->j_flags & JBD2_LOADED)); 1980 1981 err = load_superblock(journal); 1982 if (err) 1983 return err; 1984 1985 if (!journal->j_tail) 1986 goto no_recovery; 1987 1988 printk(KERN_WARNING "JBD2: %s recovery information on journal\n", 1989 write ? "Clearing" : "Ignoring"); 1990 1991 err = jbd2_journal_skip_recovery(journal); 1992 if (write) { 1993 /* Lock to make assertions happy... */ 1994 mutex_lock(&journal->j_checkpoint_mutex); 1995 jbd2_mark_journal_empty(journal); 1996 mutex_unlock(&journal->j_checkpoint_mutex); 1997 } 1998 1999 no_recovery: 2000 return err; 2001 } 2002 2003 /* 2004 * Journal abort has very specific semantics, which we describe 2005 * for journal abort. 2006 * 2007 * Two internal functions, which provide abort to the jbd layer 2008 * itself are here. 2009 */ 2010 2011 /* 2012 * Quick version for internal journal use (doesn't lock the journal). 2013 * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, 2014 * and don't attempt to make any other journal updates. 2015 */ 2016 void __jbd2_journal_abort_hard(journal_t *journal) 2017 { 2018 transaction_t *transaction; 2019 2020 if (journal->j_flags & JBD2_ABORT) 2021 return; 2022 2023 printk(KERN_ERR "Aborting journal on device %s.\n", 2024 journal->j_devname); 2025 2026 write_lock(&journal->j_state_lock); 2027 journal->j_flags |= JBD2_ABORT; 2028 transaction = journal->j_running_transaction; 2029 if (transaction) 2030 __jbd2_log_start_commit(journal, transaction->t_tid); 2031 write_unlock(&journal->j_state_lock); 2032 } 2033 2034 /* Soft abort: record the abort error status in the journal superblock, 2035 * but don't do any other IO. */ 2036 static void __journal_abort_soft (journal_t *journal, int errno) 2037 { 2038 if (journal->j_flags & JBD2_ABORT) 2039 return; 2040 2041 if (!journal->j_errno) 2042 journal->j_errno = errno; 2043 2044 __jbd2_journal_abort_hard(journal); 2045 2046 if (errno) 2047 jbd2_journal_update_sb_errno(journal); 2048 } 2049 2050 /** 2051 * void jbd2_journal_abort () - Shutdown the journal immediately. 2052 * @journal: the journal to shutdown. 2053 * @errno: an error number to record in the journal indicating 2054 * the reason for the shutdown. 2055 * 2056 * Perform a complete, immediate shutdown of the ENTIRE 2057 * journal (not of a single transaction). This operation cannot be 2058 * undone without closing and reopening the journal. 2059 * 2060 * The jbd2_journal_abort function is intended to support higher level error 2061 * recovery mechanisms such as the ext2/ext3 remount-readonly error 2062 * mode. 2063 * 2064 * Journal abort has very specific semantics. Any existing dirty, 2065 * unjournaled buffers in the main filesystem will still be written to 2066 * disk by bdflush, but the journaling mechanism will be suspended 2067 * immediately and no further transaction commits will be honoured. 2068 * 2069 * Any dirty, journaled buffers will be written back to disk without 2070 * hitting the journal. Atomicity cannot be guaranteed on an aborted 2071 * filesystem, but we _do_ attempt to leave as much data as possible 2072 * behind for fsck to use for cleanup. 2073 * 2074 * Any attempt to get a new transaction handle on a journal which is in 2075 * ABORT state will just result in an -EROFS error return. A 2076 * jbd2_journal_stop on an existing handle will return -EIO if we have 2077 * entered abort state during the update. 2078 * 2079 * Recursive transactions are not disturbed by journal abort until the 2080 * final jbd2_journal_stop, which will receive the -EIO error. 2081 * 2082 * Finally, the jbd2_journal_abort call allows the caller to supply an errno 2083 * which will be recorded (if possible) in the journal superblock. This 2084 * allows a client to record failure conditions in the middle of a 2085 * transaction without having to complete the transaction to record the 2086 * failure to disk. ext3_error, for example, now uses this 2087 * functionality. 2088 * 2089 * Errors which originate from within the journaling layer will NOT 2090 * supply an errno; a null errno implies that absolutely no further 2091 * writes are done to the journal (unless there are any already in 2092 * progress). 2093 * 2094 */ 2095 2096 void jbd2_journal_abort(journal_t *journal, int errno) 2097 { 2098 __journal_abort_soft(journal, errno); 2099 } 2100 2101 /** 2102 * int jbd2_journal_errno () - returns the journal's error state. 2103 * @journal: journal to examine. 2104 * 2105 * This is the errno number set with jbd2_journal_abort(), the last 2106 * time the journal was mounted - if the journal was stopped 2107 * without calling abort this will be 0. 2108 * 2109 * If the journal has been aborted on this mount time -EROFS will 2110 * be returned. 2111 */ 2112 int jbd2_journal_errno(journal_t *journal) 2113 { 2114 int err; 2115 2116 read_lock(&journal->j_state_lock); 2117 if (journal->j_flags & JBD2_ABORT) 2118 err = -EROFS; 2119 else 2120 err = journal->j_errno; 2121 read_unlock(&journal->j_state_lock); 2122 return err; 2123 } 2124 2125 /** 2126 * int jbd2_journal_clear_err () - clears the journal's error state 2127 * @journal: journal to act on. 2128 * 2129 * An error must be cleared or acked to take a FS out of readonly 2130 * mode. 2131 */ 2132 int jbd2_journal_clear_err(journal_t *journal) 2133 { 2134 int err = 0; 2135 2136 write_lock(&journal->j_state_lock); 2137 if (journal->j_flags & JBD2_ABORT) 2138 err = -EROFS; 2139 else 2140 journal->j_errno = 0; 2141 write_unlock(&journal->j_state_lock); 2142 return err; 2143 } 2144 2145 /** 2146 * void jbd2_journal_ack_err() - Ack journal err. 2147 * @journal: journal to act on. 2148 * 2149 * An error must be cleared or acked to take a FS out of readonly 2150 * mode. 2151 */ 2152 void jbd2_journal_ack_err(journal_t *journal) 2153 { 2154 write_lock(&journal->j_state_lock); 2155 if (journal->j_errno) 2156 journal->j_flags |= JBD2_ACK_ERR; 2157 write_unlock(&journal->j_state_lock); 2158 } 2159 2160 int jbd2_journal_blocks_per_page(struct inode *inode) 2161 { 2162 return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 2163 } 2164 2165 /* 2166 * helper functions to deal with 32 or 64bit block numbers. 2167 */ 2168 size_t journal_tag_bytes(journal_t *journal) 2169 { 2170 journal_block_tag_t tag; 2171 size_t x = 0; 2172 2173 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 2174 x += sizeof(tag.t_checksum); 2175 2176 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 2177 return x + JBD2_TAG_SIZE64; 2178 else 2179 return x + JBD2_TAG_SIZE32; 2180 } 2181 2182 /* 2183 * JBD memory management 2184 * 2185 * These functions are used to allocate block-sized chunks of memory 2186 * used for making copies of buffer_head data. Very often it will be 2187 * page-sized chunks of data, but sometimes it will be in 2188 * sub-page-size chunks. (For example, 16k pages on Power systems 2189 * with a 4k block file system.) For blocks smaller than a page, we 2190 * use a SLAB allocator. There are slab caches for each block size, 2191 * which are allocated at mount time, if necessary, and we only free 2192 * (all of) the slab caches when/if the jbd2 module is unloaded. For 2193 * this reason we don't need to a mutex to protect access to 2194 * jbd2_slab[] allocating or releasing memory; only in 2195 * jbd2_journal_create_slab(). 2196 */ 2197 #define JBD2_MAX_SLABS 8 2198 static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; 2199 2200 static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { 2201 "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", 2202 "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" 2203 }; 2204 2205 2206 static void jbd2_journal_destroy_slabs(void) 2207 { 2208 int i; 2209 2210 for (i = 0; i < JBD2_MAX_SLABS; i++) { 2211 if (jbd2_slab[i]) 2212 kmem_cache_destroy(jbd2_slab[i]); 2213 jbd2_slab[i] = NULL; 2214 } 2215 } 2216 2217 static int jbd2_journal_create_slab(size_t size) 2218 { 2219 static DEFINE_MUTEX(jbd2_slab_create_mutex); 2220 int i = order_base_2(size) - 10; 2221 size_t slab_size; 2222 2223 if (size == PAGE_SIZE) 2224 return 0; 2225 2226 if (i >= JBD2_MAX_SLABS) 2227 return -EINVAL; 2228 2229 if (unlikely(i < 0)) 2230 i = 0; 2231 mutex_lock(&jbd2_slab_create_mutex); 2232 if (jbd2_slab[i]) { 2233 mutex_unlock(&jbd2_slab_create_mutex); 2234 return 0; /* Already created */ 2235 } 2236 2237 slab_size = 1 << (i+10); 2238 jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, 2239 slab_size, 0, NULL); 2240 mutex_unlock(&jbd2_slab_create_mutex); 2241 if (!jbd2_slab[i]) { 2242 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); 2243 return -ENOMEM; 2244 } 2245 return 0; 2246 } 2247 2248 static struct kmem_cache *get_slab(size_t size) 2249 { 2250 int i = order_base_2(size) - 10; 2251 2252 BUG_ON(i >= JBD2_MAX_SLABS); 2253 if (unlikely(i < 0)) 2254 i = 0; 2255 BUG_ON(jbd2_slab[i] == NULL); 2256 return jbd2_slab[i]; 2257 } 2258 2259 void *jbd2_alloc(size_t size, gfp_t flags) 2260 { 2261 void *ptr; 2262 2263 BUG_ON(size & (size-1)); /* Must be a power of 2 */ 2264 2265 flags |= __GFP_REPEAT; 2266 if (size == PAGE_SIZE) 2267 ptr = (void *)__get_free_pages(flags, 0); 2268 else if (size > PAGE_SIZE) { 2269 int order = get_order(size); 2270 2271 if (order < 3) 2272 ptr = (void *)__get_free_pages(flags, order); 2273 else 2274 ptr = vmalloc(size); 2275 } else 2276 ptr = kmem_cache_alloc(get_slab(size), flags); 2277 2278 /* Check alignment; SLUB has gotten this wrong in the past, 2279 * and this can lead to user data corruption! */ 2280 BUG_ON(((unsigned long) ptr) & (size-1)); 2281 2282 return ptr; 2283 } 2284 2285 void jbd2_free(void *ptr, size_t size) 2286 { 2287 if (size == PAGE_SIZE) { 2288 free_pages((unsigned long)ptr, 0); 2289 return; 2290 } 2291 if (size > PAGE_SIZE) { 2292 int order = get_order(size); 2293 2294 if (order < 3) 2295 free_pages((unsigned long)ptr, order); 2296 else 2297 vfree(ptr); 2298 return; 2299 } 2300 kmem_cache_free(get_slab(size), ptr); 2301 }; 2302 2303 /* 2304 * Journal_head storage management 2305 */ 2306 static struct kmem_cache *jbd2_journal_head_cache; 2307 #ifdef CONFIG_JBD2_DEBUG 2308 static atomic_t nr_journal_heads = ATOMIC_INIT(0); 2309 #endif 2310 2311 static int jbd2_journal_init_journal_head_cache(void) 2312 { 2313 int retval; 2314 2315 J_ASSERT(jbd2_journal_head_cache == NULL); 2316 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", 2317 sizeof(struct journal_head), 2318 0, /* offset */ 2319 SLAB_TEMPORARY, /* flags */ 2320 NULL); /* ctor */ 2321 retval = 0; 2322 if (!jbd2_journal_head_cache) { 2323 retval = -ENOMEM; 2324 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n"); 2325 } 2326 return retval; 2327 } 2328 2329 static void jbd2_journal_destroy_journal_head_cache(void) 2330 { 2331 if (jbd2_journal_head_cache) { 2332 kmem_cache_destroy(jbd2_journal_head_cache); 2333 jbd2_journal_head_cache = NULL; 2334 } 2335 } 2336 2337 /* 2338 * journal_head splicing and dicing 2339 */ 2340 static struct journal_head *journal_alloc_journal_head(void) 2341 { 2342 struct journal_head *ret; 2343 2344 #ifdef CONFIG_JBD2_DEBUG 2345 atomic_inc(&nr_journal_heads); 2346 #endif 2347 ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); 2348 if (!ret) { 2349 jbd_debug(1, "out of memory for journal_head\n"); 2350 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); 2351 while (!ret) { 2352 yield(); 2353 ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); 2354 } 2355 } 2356 return ret; 2357 } 2358 2359 static void journal_free_journal_head(struct journal_head *jh) 2360 { 2361 #ifdef CONFIG_JBD2_DEBUG 2362 atomic_dec(&nr_journal_heads); 2363 memset(jh, JBD2_POISON_FREE, sizeof(*jh)); 2364 #endif 2365 kmem_cache_free(jbd2_journal_head_cache, jh); 2366 } 2367 2368 /* 2369 * A journal_head is attached to a buffer_head whenever JBD has an 2370 * interest in the buffer. 2371 * 2372 * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit 2373 * is set. This bit is tested in core kernel code where we need to take 2374 * JBD-specific actions. Testing the zeroness of ->b_private is not reliable 2375 * there. 2376 * 2377 * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. 2378 * 2379 * When a buffer has its BH_JBD bit set it is immune from being released by 2380 * core kernel code, mainly via ->b_count. 2381 * 2382 * A journal_head is detached from its buffer_head when the journal_head's 2383 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint 2384 * transaction (b_cp_transaction) hold their references to b_jcount. 2385 * 2386 * Various places in the kernel want to attach a journal_head to a buffer_head 2387 * _before_ attaching the journal_head to a transaction. To protect the 2388 * journal_head in this situation, jbd2_journal_add_journal_head elevates the 2389 * journal_head's b_jcount refcount by one. The caller must call 2390 * jbd2_journal_put_journal_head() to undo this. 2391 * 2392 * So the typical usage would be: 2393 * 2394 * (Attach a journal_head if needed. Increments b_jcount) 2395 * struct journal_head *jh = jbd2_journal_add_journal_head(bh); 2396 * ... 2397 * (Get another reference for transaction) 2398 * jbd2_journal_grab_journal_head(bh); 2399 * jh->b_transaction = xxx; 2400 * (Put original reference) 2401 * jbd2_journal_put_journal_head(jh); 2402 */ 2403 2404 /* 2405 * Give a buffer_head a journal_head. 2406 * 2407 * May sleep. 2408 */ 2409 struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) 2410 { 2411 struct journal_head *jh; 2412 struct journal_head *new_jh = NULL; 2413 2414 repeat: 2415 if (!buffer_jbd(bh)) 2416 new_jh = journal_alloc_journal_head(); 2417 2418 jbd_lock_bh_journal_head(bh); 2419 if (buffer_jbd(bh)) { 2420 jh = bh2jh(bh); 2421 } else { 2422 J_ASSERT_BH(bh, 2423 (atomic_read(&bh->b_count) > 0) || 2424 (bh->b_page && bh->b_page->mapping)); 2425 2426 if (!new_jh) { 2427 jbd_unlock_bh_journal_head(bh); 2428 goto repeat; 2429 } 2430 2431 jh = new_jh; 2432 new_jh = NULL; /* We consumed it */ 2433 set_buffer_jbd(bh); 2434 bh->b_private = jh; 2435 jh->b_bh = bh; 2436 get_bh(bh); 2437 BUFFER_TRACE(bh, "added journal_head"); 2438 } 2439 jh->b_jcount++; 2440 jbd_unlock_bh_journal_head(bh); 2441 if (new_jh) 2442 journal_free_journal_head(new_jh); 2443 return bh->b_private; 2444 } 2445 2446 /* 2447 * Grab a ref against this buffer_head's journal_head. If it ended up not 2448 * having a journal_head, return NULL 2449 */ 2450 struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) 2451 { 2452 struct journal_head *jh = NULL; 2453 2454 jbd_lock_bh_journal_head(bh); 2455 if (buffer_jbd(bh)) { 2456 jh = bh2jh(bh); 2457 jh->b_jcount++; 2458 } 2459 jbd_unlock_bh_journal_head(bh); 2460 return jh; 2461 } 2462 2463 static void __journal_remove_journal_head(struct buffer_head *bh) 2464 { 2465 struct journal_head *jh = bh2jh(bh); 2466 2467 J_ASSERT_JH(jh, jh->b_jcount >= 0); 2468 J_ASSERT_JH(jh, jh->b_transaction == NULL); 2469 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 2470 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 2471 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 2472 J_ASSERT_BH(bh, buffer_jbd(bh)); 2473 J_ASSERT_BH(bh, jh2bh(jh) == bh); 2474 BUFFER_TRACE(bh, "remove journal_head"); 2475 if (jh->b_frozen_data) { 2476 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); 2477 jbd2_free(jh->b_frozen_data, bh->b_size); 2478 } 2479 if (jh->b_committed_data) { 2480 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); 2481 jbd2_free(jh->b_committed_data, bh->b_size); 2482 } 2483 bh->b_private = NULL; 2484 jh->b_bh = NULL; /* debug, really */ 2485 clear_buffer_jbd(bh); 2486 journal_free_journal_head(jh); 2487 } 2488 2489 /* 2490 * Drop a reference on the passed journal_head. If it fell to zero then 2491 * release the journal_head from the buffer_head. 2492 */ 2493 void jbd2_journal_put_journal_head(struct journal_head *jh) 2494 { 2495 struct buffer_head *bh = jh2bh(jh); 2496 2497 jbd_lock_bh_journal_head(bh); 2498 J_ASSERT_JH(jh, jh->b_jcount > 0); 2499 --jh->b_jcount; 2500 if (!jh->b_jcount) { 2501 __journal_remove_journal_head(bh); 2502 jbd_unlock_bh_journal_head(bh); 2503 __brelse(bh); 2504 } else 2505 jbd_unlock_bh_journal_head(bh); 2506 } 2507 2508 /* 2509 * Initialize jbd inode head 2510 */ 2511 void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) 2512 { 2513 jinode->i_transaction = NULL; 2514 jinode->i_next_transaction = NULL; 2515 jinode->i_vfs_inode = inode; 2516 jinode->i_flags = 0; 2517 INIT_LIST_HEAD(&jinode->i_list); 2518 } 2519 2520 /* 2521 * Function to be called before we start removing inode from memory (i.e., 2522 * clear_inode() is a fine place to be called from). It removes inode from 2523 * transaction's lists. 2524 */ 2525 void jbd2_journal_release_jbd_inode(journal_t *journal, 2526 struct jbd2_inode *jinode) 2527 { 2528 if (!journal) 2529 return; 2530 restart: 2531 spin_lock(&journal->j_list_lock); 2532 /* Is commit writing out inode - we have to wait */ 2533 if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) { 2534 wait_queue_head_t *wq; 2535 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 2536 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 2537 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 2538 spin_unlock(&journal->j_list_lock); 2539 schedule(); 2540 finish_wait(wq, &wait.wait); 2541 goto restart; 2542 } 2543 2544 if (jinode->i_transaction) { 2545 list_del(&jinode->i_list); 2546 jinode->i_transaction = NULL; 2547 } 2548 spin_unlock(&journal->j_list_lock); 2549 } 2550 2551 2552 #ifdef CONFIG_PROC_FS 2553 2554 #define JBD2_STATS_PROC_NAME "fs/jbd2" 2555 2556 static void __init jbd2_create_jbd_stats_proc_entry(void) 2557 { 2558 proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL); 2559 } 2560 2561 static void __exit jbd2_remove_jbd_stats_proc_entry(void) 2562 { 2563 if (proc_jbd2_stats) 2564 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL); 2565 } 2566 2567 #else 2568 2569 #define jbd2_create_jbd_stats_proc_entry() do {} while (0) 2570 #define jbd2_remove_jbd_stats_proc_entry() do {} while (0) 2571 2572 #endif 2573 2574 struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 2575 2576 static int __init jbd2_journal_init_handle_cache(void) 2577 { 2578 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 2579 if (jbd2_handle_cache == NULL) { 2580 printk(KERN_EMERG "JBD2: failed to create handle cache\n"); 2581 return -ENOMEM; 2582 } 2583 jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0); 2584 if (jbd2_inode_cache == NULL) { 2585 printk(KERN_EMERG "JBD2: failed to create inode cache\n"); 2586 kmem_cache_destroy(jbd2_handle_cache); 2587 return -ENOMEM; 2588 } 2589 return 0; 2590 } 2591 2592 static void jbd2_journal_destroy_handle_cache(void) 2593 { 2594 if (jbd2_handle_cache) 2595 kmem_cache_destroy(jbd2_handle_cache); 2596 if (jbd2_inode_cache) 2597 kmem_cache_destroy(jbd2_inode_cache); 2598 2599 } 2600 2601 /* 2602 * Module startup and shutdown 2603 */ 2604 2605 static int __init journal_init_caches(void) 2606 { 2607 int ret; 2608 2609 ret = jbd2_journal_init_revoke_caches(); 2610 if (ret == 0) 2611 ret = jbd2_journal_init_journal_head_cache(); 2612 if (ret == 0) 2613 ret = jbd2_journal_init_handle_cache(); 2614 if (ret == 0) 2615 ret = jbd2_journal_init_transaction_cache(); 2616 return ret; 2617 } 2618 2619 static void jbd2_journal_destroy_caches(void) 2620 { 2621 jbd2_journal_destroy_revoke_caches(); 2622 jbd2_journal_destroy_journal_head_cache(); 2623 jbd2_journal_destroy_handle_cache(); 2624 jbd2_journal_destroy_transaction_cache(); 2625 jbd2_journal_destroy_slabs(); 2626 } 2627 2628 static int __init journal_init(void) 2629 { 2630 int ret; 2631 2632 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2633 2634 ret = journal_init_caches(); 2635 if (ret == 0) { 2636 jbd2_create_jbd_stats_proc_entry(); 2637 } else { 2638 jbd2_journal_destroy_caches(); 2639 } 2640 return ret; 2641 } 2642 2643 static void __exit journal_exit(void) 2644 { 2645 #ifdef CONFIG_JBD2_DEBUG 2646 int n = atomic_read(&nr_journal_heads); 2647 if (n) 2648 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); 2649 #endif 2650 jbd2_remove_jbd_stats_proc_entry(); 2651 jbd2_journal_destroy_caches(); 2652 } 2653 2654 MODULE_LICENSE("GPL"); 2655 module_init(journal_init); 2656 module_exit(journal_exit); 2657 2658