1 /* 2 * linux/fs/jbd2/journal.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Generic filesystem journal-writing code; part of the ext2fs 13 * journaling system. 14 * 15 * This file manages journals: areas of disk reserved for logging 16 * transactional updates. This includes the kernel journaling thread 17 * which is responsible for scheduling updates to the log. 18 * 19 * We do not actually manage the physical storage of the journal in this 20 * file: that is left to a per-journal policy function, which allows us 21 * to store the journal within a filesystem-specified area for ext2 22 * journaling (ext2 can use a reserved inode for storing the log). 23 */ 24 25 #include <linux/module.h> 26 #include <linux/time.h> 27 #include <linux/fs.h> 28 #include <linux/jbd2.h> 29 #include <linux/errno.h> 30 #include <linux/slab.h> 31 #include <linux/init.h> 32 #include <linux/mm.h> 33 #include <linux/freezer.h> 34 #include <linux/pagemap.h> 35 #include <linux/kthread.h> 36 #include <linux/poison.h> 37 #include <linux/proc_fs.h> 38 #include <linux/seq_file.h> 39 #include <linux/math64.h> 40 #include <linux/hash.h> 41 #include <linux/log2.h> 42 #include <linux/vmalloc.h> 43 #include <linux/backing-dev.h> 44 #include <linux/bitops.h> 45 #include <linux/ratelimit.h> 46 #include <linux/sched/mm.h> 47 48 #define CREATE_TRACE_POINTS 49 #include <trace/events/jbd2.h> 50 51 #include <linux/uaccess.h> 52 #include <asm/page.h> 53 54 #ifdef CONFIG_JBD2_DEBUG 55 ushort jbd2_journal_enable_debug __read_mostly; 56 EXPORT_SYMBOL(jbd2_journal_enable_debug); 57 58 module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644); 59 MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2"); 60 #endif 61 62 EXPORT_SYMBOL(jbd2_journal_extend); 63 EXPORT_SYMBOL(jbd2_journal_stop); 64 EXPORT_SYMBOL(jbd2_journal_lock_updates); 65 EXPORT_SYMBOL(jbd2_journal_unlock_updates); 66 EXPORT_SYMBOL(jbd2_journal_get_write_access); 67 EXPORT_SYMBOL(jbd2_journal_get_create_access); 68 EXPORT_SYMBOL(jbd2_journal_get_undo_access); 69 EXPORT_SYMBOL(jbd2_journal_set_triggers); 70 EXPORT_SYMBOL(jbd2_journal_dirty_metadata); 71 EXPORT_SYMBOL(jbd2_journal_forget); 72 #if 0 73 EXPORT_SYMBOL(journal_sync_buffer); 74 #endif 75 EXPORT_SYMBOL(jbd2_journal_flush); 76 EXPORT_SYMBOL(jbd2_journal_revoke); 77 78 EXPORT_SYMBOL(jbd2_journal_init_dev); 79 EXPORT_SYMBOL(jbd2_journal_init_inode); 80 EXPORT_SYMBOL(jbd2_journal_check_used_features); 81 EXPORT_SYMBOL(jbd2_journal_check_available_features); 82 EXPORT_SYMBOL(jbd2_journal_set_features); 83 EXPORT_SYMBOL(jbd2_journal_load); 84 EXPORT_SYMBOL(jbd2_journal_destroy); 85 EXPORT_SYMBOL(jbd2_journal_abort); 86 EXPORT_SYMBOL(jbd2_journal_errno); 87 EXPORT_SYMBOL(jbd2_journal_ack_err); 88 EXPORT_SYMBOL(jbd2_journal_clear_err); 89 EXPORT_SYMBOL(jbd2_log_wait_commit); 90 EXPORT_SYMBOL(jbd2_log_start_commit); 91 EXPORT_SYMBOL(jbd2_journal_start_commit); 92 EXPORT_SYMBOL(jbd2_journal_force_commit_nested); 93 EXPORT_SYMBOL(jbd2_journal_wipe); 94 EXPORT_SYMBOL(jbd2_journal_blocks_per_page); 95 EXPORT_SYMBOL(jbd2_journal_invalidatepage); 96 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); 97 EXPORT_SYMBOL(jbd2_journal_force_commit); 98 EXPORT_SYMBOL(jbd2_journal_inode_add_write); 99 EXPORT_SYMBOL(jbd2_journal_inode_add_wait); 100 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); 101 EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); 102 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 103 EXPORT_SYMBOL(jbd2_inode_cache); 104 105 static void __journal_abort_soft (journal_t *journal, int errno); 106 static int jbd2_journal_create_slab(size_t slab_size); 107 108 #ifdef CONFIG_JBD2_DEBUG 109 void __jbd2_debug(int level, const char *file, const char *func, 110 unsigned int line, const char *fmt, ...) 111 { 112 struct va_format vaf; 113 va_list args; 114 115 if (level > jbd2_journal_enable_debug) 116 return; 117 va_start(args, fmt); 118 vaf.fmt = fmt; 119 vaf.va = &args; 120 printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); 121 va_end(args); 122 } 123 EXPORT_SYMBOL(__jbd2_debug); 124 #endif 125 126 /* Checksumming functions */ 127 static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 128 { 129 if (!jbd2_journal_has_csum_v2or3_feature(j)) 130 return 1; 131 132 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 133 } 134 135 static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 136 { 137 __u32 csum; 138 __be32 old_csum; 139 140 old_csum = sb->s_checksum; 141 sb->s_checksum = 0; 142 csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t)); 143 sb->s_checksum = old_csum; 144 145 return cpu_to_be32(csum); 146 } 147 148 static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 149 { 150 if (!jbd2_journal_has_csum_v2or3(j)) 151 return 1; 152 153 return sb->s_checksum == jbd2_superblock_csum(j, sb); 154 } 155 156 static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 157 { 158 if (!jbd2_journal_has_csum_v2or3(j)) 159 return; 160 161 sb->s_checksum = jbd2_superblock_csum(j, sb); 162 } 163 164 /* 165 * Helper function used to manage commit timeouts 166 */ 167 168 static void commit_timeout(struct timer_list *t) 169 { 170 journal_t *journal = from_timer(journal, t, j_commit_timer); 171 172 wake_up_process(journal->j_task); 173 } 174 175 /* 176 * kjournald2: The main thread function used to manage a logging device 177 * journal. 178 * 179 * This kernel thread is responsible for two things: 180 * 181 * 1) COMMIT: Every so often we need to commit the current state of the 182 * filesystem to disk. The journal thread is responsible for writing 183 * all of the metadata buffers to disk. 184 * 185 * 2) CHECKPOINT: We cannot reuse a used section of the log file until all 186 * of the data in that part of the log has been rewritten elsewhere on 187 * the disk. Flushing these old buffers to reclaim space in the log is 188 * known as checkpointing, and this thread is responsible for that job. 189 */ 190 191 static int kjournald2(void *arg) 192 { 193 journal_t *journal = arg; 194 transaction_t *transaction; 195 196 /* 197 * Set up an interval timer which can be used to trigger a commit wakeup 198 * after the commit interval expires 199 */ 200 timer_setup(&journal->j_commit_timer, commit_timeout, 0); 201 202 set_freezable(); 203 204 /* Record that the journal thread is running */ 205 journal->j_task = current; 206 wake_up(&journal->j_wait_done_commit); 207 208 /* 209 * Make sure that no allocations from this kernel thread will ever 210 * recurse to the fs layer because we are responsible for the 211 * transaction commit and any fs involvement might get stuck waiting for 212 * the trasn. commit. 213 */ 214 memalloc_nofs_save(); 215 216 /* 217 * And now, wait forever for commit wakeup events. 218 */ 219 write_lock(&journal->j_state_lock); 220 221 loop: 222 if (journal->j_flags & JBD2_UNMOUNT) 223 goto end_loop; 224 225 jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", 226 journal->j_commit_sequence, journal->j_commit_request); 227 228 if (journal->j_commit_sequence != journal->j_commit_request) { 229 jbd_debug(1, "OK, requests differ\n"); 230 write_unlock(&journal->j_state_lock); 231 del_timer_sync(&journal->j_commit_timer); 232 jbd2_journal_commit_transaction(journal); 233 write_lock(&journal->j_state_lock); 234 goto loop; 235 } 236 237 wake_up(&journal->j_wait_done_commit); 238 if (freezing(current)) { 239 /* 240 * The simpler the better. Flushing journal isn't a 241 * good idea, because that depends on threads that may 242 * be already stopped. 243 */ 244 jbd_debug(1, "Now suspending kjournald2\n"); 245 write_unlock(&journal->j_state_lock); 246 try_to_freeze(); 247 write_lock(&journal->j_state_lock); 248 } else { 249 /* 250 * We assume on resume that commits are already there, 251 * so we don't sleep 252 */ 253 DEFINE_WAIT(wait); 254 int should_sleep = 1; 255 256 prepare_to_wait(&journal->j_wait_commit, &wait, 257 TASK_INTERRUPTIBLE); 258 if (journal->j_commit_sequence != journal->j_commit_request) 259 should_sleep = 0; 260 transaction = journal->j_running_transaction; 261 if (transaction && time_after_eq(jiffies, 262 transaction->t_expires)) 263 should_sleep = 0; 264 if (journal->j_flags & JBD2_UNMOUNT) 265 should_sleep = 0; 266 if (should_sleep) { 267 write_unlock(&journal->j_state_lock); 268 schedule(); 269 write_lock(&journal->j_state_lock); 270 } 271 finish_wait(&journal->j_wait_commit, &wait); 272 } 273 274 jbd_debug(1, "kjournald2 wakes\n"); 275 276 /* 277 * Were we woken up by a commit wakeup event? 278 */ 279 transaction = journal->j_running_transaction; 280 if (transaction && time_after_eq(jiffies, transaction->t_expires)) { 281 journal->j_commit_request = transaction->t_tid; 282 jbd_debug(1, "woke because of timeout\n"); 283 } 284 goto loop; 285 286 end_loop: 287 del_timer_sync(&journal->j_commit_timer); 288 journal->j_task = NULL; 289 wake_up(&journal->j_wait_done_commit); 290 jbd_debug(1, "Journal thread exiting.\n"); 291 write_unlock(&journal->j_state_lock); 292 return 0; 293 } 294 295 static int jbd2_journal_start_thread(journal_t *journal) 296 { 297 struct task_struct *t; 298 299 t = kthread_run(kjournald2, journal, "jbd2/%s", 300 journal->j_devname); 301 if (IS_ERR(t)) 302 return PTR_ERR(t); 303 304 wait_event(journal->j_wait_done_commit, journal->j_task != NULL); 305 return 0; 306 } 307 308 static void journal_kill_thread(journal_t *journal) 309 { 310 write_lock(&journal->j_state_lock); 311 journal->j_flags |= JBD2_UNMOUNT; 312 313 while (journal->j_task) { 314 write_unlock(&journal->j_state_lock); 315 wake_up(&journal->j_wait_commit); 316 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 317 write_lock(&journal->j_state_lock); 318 } 319 write_unlock(&journal->j_state_lock); 320 } 321 322 /* 323 * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. 324 * 325 * Writes a metadata buffer to a given disk block. The actual IO is not 326 * performed but a new buffer_head is constructed which labels the data 327 * to be written with the correct destination disk block. 328 * 329 * Any magic-number escaping which needs to be done will cause a 330 * copy-out here. If the buffer happens to start with the 331 * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the 332 * magic number is only written to the log for descripter blocks. In 333 * this case, we copy the data and replace the first word with 0, and we 334 * return a result code which indicates that this buffer needs to be 335 * marked as an escaped buffer in the corresponding log descriptor 336 * block. The missing word can then be restored when the block is read 337 * during recovery. 338 * 339 * If the source buffer has already been modified by a new transaction 340 * since we took the last commit snapshot, we use the frozen copy of 341 * that data for IO. If we end up using the existing buffer_head's data 342 * for the write, then we have to make sure nobody modifies it while the 343 * IO is in progress. do_get_write_access() handles this. 344 * 345 * The function returns a pointer to the buffer_head to be used for IO. 346 * 347 * 348 * Return value: 349 * <0: Error 350 * >=0: Finished OK 351 * 352 * On success: 353 * Bit 0 set == escape performed on the data 354 * Bit 1 set == buffer copy-out performed (kfree the data after IO) 355 */ 356 357 int jbd2_journal_write_metadata_buffer(transaction_t *transaction, 358 struct journal_head *jh_in, 359 struct buffer_head **bh_out, 360 sector_t blocknr) 361 { 362 int need_copy_out = 0; 363 int done_copy_out = 0; 364 int do_escape = 0; 365 char *mapped_data; 366 struct buffer_head *new_bh; 367 struct page *new_page; 368 unsigned int new_offset; 369 struct buffer_head *bh_in = jh2bh(jh_in); 370 journal_t *journal = transaction->t_journal; 371 372 /* 373 * The buffer really shouldn't be locked: only the current committing 374 * transaction is allowed to write it, so nobody else is allowed 375 * to do any IO. 376 * 377 * akpm: except if we're journalling data, and write() output is 378 * also part of a shared mapping, and another thread has 379 * decided to launch a writepage() against this buffer. 380 */ 381 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 382 383 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 384 385 /* keep subsequent assertions sane */ 386 atomic_set(&new_bh->b_count, 1); 387 388 jbd_lock_bh_state(bh_in); 389 repeat: 390 /* 391 * If a new transaction has already done a buffer copy-out, then 392 * we use that version of the data for the commit. 393 */ 394 if (jh_in->b_frozen_data) { 395 done_copy_out = 1; 396 new_page = virt_to_page(jh_in->b_frozen_data); 397 new_offset = offset_in_page(jh_in->b_frozen_data); 398 } else { 399 new_page = jh2bh(jh_in)->b_page; 400 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 401 } 402 403 mapped_data = kmap_atomic(new_page); 404 /* 405 * Fire data frozen trigger if data already wasn't frozen. Do this 406 * before checking for escaping, as the trigger may modify the magic 407 * offset. If a copy-out happens afterwards, it will have the correct 408 * data in the buffer. 409 */ 410 if (!done_copy_out) 411 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset, 412 jh_in->b_triggers); 413 414 /* 415 * Check for escaping 416 */ 417 if (*((__be32 *)(mapped_data + new_offset)) == 418 cpu_to_be32(JBD2_MAGIC_NUMBER)) { 419 need_copy_out = 1; 420 do_escape = 1; 421 } 422 kunmap_atomic(mapped_data); 423 424 /* 425 * Do we need to do a data copy? 426 */ 427 if (need_copy_out && !done_copy_out) { 428 char *tmp; 429 430 jbd_unlock_bh_state(bh_in); 431 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 432 if (!tmp) { 433 brelse(new_bh); 434 return -ENOMEM; 435 } 436 jbd_lock_bh_state(bh_in); 437 if (jh_in->b_frozen_data) { 438 jbd2_free(tmp, bh_in->b_size); 439 goto repeat; 440 } 441 442 jh_in->b_frozen_data = tmp; 443 mapped_data = kmap_atomic(new_page); 444 memcpy(tmp, mapped_data + new_offset, bh_in->b_size); 445 kunmap_atomic(mapped_data); 446 447 new_page = virt_to_page(tmp); 448 new_offset = offset_in_page(tmp); 449 done_copy_out = 1; 450 451 /* 452 * This isn't strictly necessary, as we're using frozen 453 * data for the escaping, but it keeps consistency with 454 * b_frozen_data usage. 455 */ 456 jh_in->b_frozen_triggers = jh_in->b_triggers; 457 } 458 459 /* 460 * Did we need to do an escaping? Now we've done all the 461 * copying, we can finally do so. 462 */ 463 if (do_escape) { 464 mapped_data = kmap_atomic(new_page); 465 *((unsigned int *)(mapped_data + new_offset)) = 0; 466 kunmap_atomic(mapped_data); 467 } 468 469 set_bh_page(new_bh, new_page, new_offset); 470 new_bh->b_size = bh_in->b_size; 471 new_bh->b_bdev = journal->j_dev; 472 new_bh->b_blocknr = blocknr; 473 new_bh->b_private = bh_in; 474 set_buffer_mapped(new_bh); 475 set_buffer_dirty(new_bh); 476 477 *bh_out = new_bh; 478 479 /* 480 * The to-be-written buffer needs to get moved to the io queue, 481 * and the original buffer whose contents we are shadowing or 482 * copying is moved to the transaction's shadow queue. 483 */ 484 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 485 spin_lock(&journal->j_list_lock); 486 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 487 spin_unlock(&journal->j_list_lock); 488 set_buffer_shadow(bh_in); 489 jbd_unlock_bh_state(bh_in); 490 491 return do_escape | (done_copy_out << 1); 492 } 493 494 /* 495 * Allocation code for the journal file. Manage the space left in the 496 * journal, so that we can begin checkpointing when appropriate. 497 */ 498 499 /* 500 * Called with j_state_lock locked for writing. 501 * Returns true if a transaction commit was started. 502 */ 503 int __jbd2_log_start_commit(journal_t *journal, tid_t target) 504 { 505 /* Return if the txn has already requested to be committed */ 506 if (journal->j_commit_request == target) 507 return 0; 508 509 /* 510 * The only transaction we can possibly wait upon is the 511 * currently running transaction (if it exists). Otherwise, 512 * the target tid must be an old one. 513 */ 514 if (journal->j_running_transaction && 515 journal->j_running_transaction->t_tid == target) { 516 /* 517 * We want a new commit: OK, mark the request and wakeup the 518 * commit thread. We do _not_ do the commit ourselves. 519 */ 520 521 journal->j_commit_request = target; 522 jbd_debug(1, "JBD2: requesting commit %d/%d\n", 523 journal->j_commit_request, 524 journal->j_commit_sequence); 525 journal->j_running_transaction->t_requested = jiffies; 526 wake_up(&journal->j_wait_commit); 527 return 1; 528 } else if (!tid_geq(journal->j_commit_request, target)) 529 /* This should never happen, but if it does, preserve 530 the evidence before kjournald goes into a loop and 531 increments j_commit_sequence beyond all recognition. */ 532 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", 533 journal->j_commit_request, 534 journal->j_commit_sequence, 535 target, journal->j_running_transaction ? 536 journal->j_running_transaction->t_tid : 0); 537 return 0; 538 } 539 540 int jbd2_log_start_commit(journal_t *journal, tid_t tid) 541 { 542 int ret; 543 544 write_lock(&journal->j_state_lock); 545 ret = __jbd2_log_start_commit(journal, tid); 546 write_unlock(&journal->j_state_lock); 547 return ret; 548 } 549 550 /* 551 * Force and wait any uncommitted transactions. We can only force the running 552 * transaction if we don't have an active handle, otherwise, we will deadlock. 553 * Returns: <0 in case of error, 554 * 0 if nothing to commit, 555 * 1 if transaction was successfully committed. 556 */ 557 static int __jbd2_journal_force_commit(journal_t *journal) 558 { 559 transaction_t *transaction = NULL; 560 tid_t tid; 561 int need_to_start = 0, ret = 0; 562 563 read_lock(&journal->j_state_lock); 564 if (journal->j_running_transaction && !current->journal_info) { 565 transaction = journal->j_running_transaction; 566 if (!tid_geq(journal->j_commit_request, transaction->t_tid)) 567 need_to_start = 1; 568 } else if (journal->j_committing_transaction) 569 transaction = journal->j_committing_transaction; 570 571 if (!transaction) { 572 /* Nothing to commit */ 573 read_unlock(&journal->j_state_lock); 574 return 0; 575 } 576 tid = transaction->t_tid; 577 read_unlock(&journal->j_state_lock); 578 if (need_to_start) 579 jbd2_log_start_commit(journal, tid); 580 ret = jbd2_log_wait_commit(journal, tid); 581 if (!ret) 582 ret = 1; 583 584 return ret; 585 } 586 587 /** 588 * Force and wait upon a commit if the calling process is not within 589 * transaction. This is used for forcing out undo-protected data which contains 590 * bitmaps, when the fs is running out of space. 591 * 592 * @journal: journal to force 593 * Returns true if progress was made. 594 */ 595 int jbd2_journal_force_commit_nested(journal_t *journal) 596 { 597 int ret; 598 599 ret = __jbd2_journal_force_commit(journal); 600 return ret > 0; 601 } 602 603 /** 604 * int journal_force_commit() - force any uncommitted transactions 605 * @journal: journal to force 606 * 607 * Caller want unconditional commit. We can only force the running transaction 608 * if we don't have an active handle, otherwise, we will deadlock. 609 */ 610 int jbd2_journal_force_commit(journal_t *journal) 611 { 612 int ret; 613 614 J_ASSERT(!current->journal_info); 615 ret = __jbd2_journal_force_commit(journal); 616 if (ret > 0) 617 ret = 0; 618 return ret; 619 } 620 621 /* 622 * Start a commit of the current running transaction (if any). Returns true 623 * if a transaction is going to be committed (or is currently already 624 * committing), and fills its tid in at *ptid 625 */ 626 int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 627 { 628 int ret = 0; 629 630 write_lock(&journal->j_state_lock); 631 if (journal->j_running_transaction) { 632 tid_t tid = journal->j_running_transaction->t_tid; 633 634 __jbd2_log_start_commit(journal, tid); 635 /* There's a running transaction and we've just made sure 636 * it's commit has been scheduled. */ 637 if (ptid) 638 *ptid = tid; 639 ret = 1; 640 } else if (journal->j_committing_transaction) { 641 /* 642 * If commit has been started, then we have to wait for 643 * completion of that transaction. 644 */ 645 if (ptid) 646 *ptid = journal->j_committing_transaction->t_tid; 647 ret = 1; 648 } 649 write_unlock(&journal->j_state_lock); 650 return ret; 651 } 652 653 /* 654 * Return 1 if a given transaction has not yet sent barrier request 655 * connected with a transaction commit. If 0 is returned, transaction 656 * may or may not have sent the barrier. Used to avoid sending barrier 657 * twice in common cases. 658 */ 659 int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) 660 { 661 int ret = 0; 662 transaction_t *commit_trans; 663 664 if (!(journal->j_flags & JBD2_BARRIER)) 665 return 0; 666 read_lock(&journal->j_state_lock); 667 /* Transaction already committed? */ 668 if (tid_geq(journal->j_commit_sequence, tid)) 669 goto out; 670 commit_trans = journal->j_committing_transaction; 671 if (!commit_trans || commit_trans->t_tid != tid) { 672 ret = 1; 673 goto out; 674 } 675 /* 676 * Transaction is being committed and we already proceeded to 677 * submitting a flush to fs partition? 678 */ 679 if (journal->j_fs_dev != journal->j_dev) { 680 if (!commit_trans->t_need_data_flush || 681 commit_trans->t_state >= T_COMMIT_DFLUSH) 682 goto out; 683 } else { 684 if (commit_trans->t_state >= T_COMMIT_JFLUSH) 685 goto out; 686 } 687 ret = 1; 688 out: 689 read_unlock(&journal->j_state_lock); 690 return ret; 691 } 692 EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier); 693 694 /* 695 * Wait for a specified commit to complete. 696 * The caller may not hold the journal lock. 697 */ 698 int jbd2_log_wait_commit(journal_t *journal, tid_t tid) 699 { 700 int err = 0; 701 702 read_lock(&journal->j_state_lock); 703 #ifdef CONFIG_PROVE_LOCKING 704 /* 705 * Some callers make sure transaction is already committing and in that 706 * case we cannot block on open handles anymore. So don't warn in that 707 * case. 708 */ 709 if (tid_gt(tid, journal->j_commit_sequence) && 710 (!journal->j_committing_transaction || 711 journal->j_committing_transaction->t_tid != tid)) { 712 read_unlock(&journal->j_state_lock); 713 jbd2_might_wait_for_commit(journal); 714 read_lock(&journal->j_state_lock); 715 } 716 #endif 717 #ifdef CONFIG_JBD2_DEBUG 718 if (!tid_geq(journal->j_commit_request, tid)) { 719 printk(KERN_ERR 720 "%s: error: j_commit_request=%d, tid=%d\n", 721 __func__, journal->j_commit_request, tid); 722 } 723 #endif 724 while (tid_gt(tid, journal->j_commit_sequence)) { 725 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", 726 tid, journal->j_commit_sequence); 727 read_unlock(&journal->j_state_lock); 728 wake_up(&journal->j_wait_commit); 729 wait_event(journal->j_wait_done_commit, 730 !tid_gt(tid, journal->j_commit_sequence)); 731 read_lock(&journal->j_state_lock); 732 } 733 read_unlock(&journal->j_state_lock); 734 735 if (unlikely(is_journal_aborted(journal))) 736 err = -EIO; 737 return err; 738 } 739 740 /* Return 1 when transaction with given tid has already committed. */ 741 int jbd2_transaction_committed(journal_t *journal, tid_t tid) 742 { 743 int ret = 1; 744 745 read_lock(&journal->j_state_lock); 746 if (journal->j_running_transaction && 747 journal->j_running_transaction->t_tid == tid) 748 ret = 0; 749 if (journal->j_committing_transaction && 750 journal->j_committing_transaction->t_tid == tid) 751 ret = 0; 752 read_unlock(&journal->j_state_lock); 753 return ret; 754 } 755 EXPORT_SYMBOL(jbd2_transaction_committed); 756 757 /* 758 * When this function returns the transaction corresponding to tid 759 * will be completed. If the transaction has currently running, start 760 * committing that transaction before waiting for it to complete. If 761 * the transaction id is stale, it is by definition already completed, 762 * so just return SUCCESS. 763 */ 764 int jbd2_complete_transaction(journal_t *journal, tid_t tid) 765 { 766 int need_to_wait = 1; 767 768 read_lock(&journal->j_state_lock); 769 if (journal->j_running_transaction && 770 journal->j_running_transaction->t_tid == tid) { 771 if (journal->j_commit_request != tid) { 772 /* transaction not yet started, so request it */ 773 read_unlock(&journal->j_state_lock); 774 jbd2_log_start_commit(journal, tid); 775 goto wait_commit; 776 } 777 } else if (!(journal->j_committing_transaction && 778 journal->j_committing_transaction->t_tid == tid)) 779 need_to_wait = 0; 780 read_unlock(&journal->j_state_lock); 781 if (!need_to_wait) 782 return 0; 783 wait_commit: 784 return jbd2_log_wait_commit(journal, tid); 785 } 786 EXPORT_SYMBOL(jbd2_complete_transaction); 787 788 /* 789 * Log buffer allocation routines: 790 */ 791 792 int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) 793 { 794 unsigned long blocknr; 795 796 write_lock(&journal->j_state_lock); 797 J_ASSERT(journal->j_free > 1); 798 799 blocknr = journal->j_head; 800 journal->j_head++; 801 journal->j_free--; 802 if (journal->j_head == journal->j_last) 803 journal->j_head = journal->j_first; 804 write_unlock(&journal->j_state_lock); 805 return jbd2_journal_bmap(journal, blocknr, retp); 806 } 807 808 /* 809 * Conversion of logical to physical block numbers for the journal 810 * 811 * On external journals the journal blocks are identity-mapped, so 812 * this is a no-op. If needed, we can use j_blk_offset - everything is 813 * ready. 814 */ 815 int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, 816 unsigned long long *retp) 817 { 818 int err = 0; 819 unsigned long long ret; 820 821 if (journal->j_inode) { 822 ret = bmap(journal->j_inode, blocknr); 823 if (ret) 824 *retp = ret; 825 else { 826 printk(KERN_ALERT "%s: journal block not found " 827 "at offset %lu on %s\n", 828 __func__, blocknr, journal->j_devname); 829 err = -EIO; 830 __journal_abort_soft(journal, err); 831 } 832 } else { 833 *retp = blocknr; /* +journal->j_blk_offset */ 834 } 835 return err; 836 } 837 838 /* 839 * We play buffer_head aliasing tricks to write data/metadata blocks to 840 * the journal without copying their contents, but for journal 841 * descriptor blocks we do need to generate bona fide buffers. 842 * 843 * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying 844 * the buffer's contents they really should run flush_dcache_page(bh->b_page). 845 * But we don't bother doing that, so there will be coherency problems with 846 * mmaps of blockdevs which hold live JBD-controlled filesystems. 847 */ 848 struct buffer_head * 849 jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type) 850 { 851 journal_t *journal = transaction->t_journal; 852 struct buffer_head *bh; 853 unsigned long long blocknr; 854 journal_header_t *header; 855 int err; 856 857 err = jbd2_journal_next_log_block(journal, &blocknr); 858 859 if (err) 860 return NULL; 861 862 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 863 if (!bh) 864 return NULL; 865 lock_buffer(bh); 866 memset(bh->b_data, 0, journal->j_blocksize); 867 header = (journal_header_t *)bh->b_data; 868 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 869 header->h_blocktype = cpu_to_be32(type); 870 header->h_sequence = cpu_to_be32(transaction->t_tid); 871 set_buffer_uptodate(bh); 872 unlock_buffer(bh); 873 BUFFER_TRACE(bh, "return this buffer"); 874 return bh; 875 } 876 877 void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh) 878 { 879 struct jbd2_journal_block_tail *tail; 880 __u32 csum; 881 882 if (!jbd2_journal_has_csum_v2or3(j)) 883 return; 884 885 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - 886 sizeof(struct jbd2_journal_block_tail)); 887 tail->t_checksum = 0; 888 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); 889 tail->t_checksum = cpu_to_be32(csum); 890 } 891 892 /* 893 * Return tid of the oldest transaction in the journal and block in the journal 894 * where the transaction starts. 895 * 896 * If the journal is now empty, return which will be the next transaction ID 897 * we will write and where will that transaction start. 898 * 899 * The return value is 0 if journal tail cannot be pushed any further, 1 if 900 * it can. 901 */ 902 int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, 903 unsigned long *block) 904 { 905 transaction_t *transaction; 906 int ret; 907 908 read_lock(&journal->j_state_lock); 909 spin_lock(&journal->j_list_lock); 910 transaction = journal->j_checkpoint_transactions; 911 if (transaction) { 912 *tid = transaction->t_tid; 913 *block = transaction->t_log_start; 914 } else if ((transaction = journal->j_committing_transaction) != NULL) { 915 *tid = transaction->t_tid; 916 *block = transaction->t_log_start; 917 } else if ((transaction = journal->j_running_transaction) != NULL) { 918 *tid = transaction->t_tid; 919 *block = journal->j_head; 920 } else { 921 *tid = journal->j_transaction_sequence; 922 *block = journal->j_head; 923 } 924 ret = tid_gt(*tid, journal->j_tail_sequence); 925 spin_unlock(&journal->j_list_lock); 926 read_unlock(&journal->j_state_lock); 927 928 return ret; 929 } 930 931 /* 932 * Update information in journal structure and in on disk journal superblock 933 * about log tail. This function does not check whether information passed in 934 * really pushes log tail further. It's responsibility of the caller to make 935 * sure provided log tail information is valid (e.g. by holding 936 * j_checkpoint_mutex all the time between computing log tail and calling this 937 * function as is the case with jbd2_cleanup_journal_tail()). 938 * 939 * Requires j_checkpoint_mutex 940 */ 941 int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 942 { 943 unsigned long freed; 944 int ret; 945 946 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 947 948 /* 949 * We cannot afford for write to remain in drive's caches since as 950 * soon as we update j_tail, next transaction can start reusing journal 951 * space and if we lose sb update during power failure we'd replay 952 * old transaction with possibly newly overwritten data. 953 */ 954 ret = jbd2_journal_update_sb_log_tail(journal, tid, block, 955 REQ_SYNC | REQ_FUA); 956 if (ret) 957 goto out; 958 959 write_lock(&journal->j_state_lock); 960 freed = block - journal->j_tail; 961 if (block < journal->j_tail) 962 freed += journal->j_last - journal->j_first; 963 964 trace_jbd2_update_log_tail(journal, tid, block, freed); 965 jbd_debug(1, 966 "Cleaning journal tail from %d to %d (offset %lu), " 967 "freeing %lu\n", 968 journal->j_tail_sequence, tid, block, freed); 969 970 journal->j_free += freed; 971 journal->j_tail_sequence = tid; 972 journal->j_tail = block; 973 write_unlock(&journal->j_state_lock); 974 975 out: 976 return ret; 977 } 978 979 /* 980 * This is a variaon of __jbd2_update_log_tail which checks for validity of 981 * provided log tail and locks j_checkpoint_mutex. So it is safe against races 982 * with other threads updating log tail. 983 */ 984 void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 985 { 986 mutex_lock_io(&journal->j_checkpoint_mutex); 987 if (tid_gt(tid, journal->j_tail_sequence)) 988 __jbd2_update_log_tail(journal, tid, block); 989 mutex_unlock(&journal->j_checkpoint_mutex); 990 } 991 992 struct jbd2_stats_proc_session { 993 journal_t *journal; 994 struct transaction_stats_s *stats; 995 int start; 996 int max; 997 }; 998 999 static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) 1000 { 1001 return *pos ? NULL : SEQ_START_TOKEN; 1002 } 1003 1004 static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) 1005 { 1006 return NULL; 1007 } 1008 1009 static int jbd2_seq_info_show(struct seq_file *seq, void *v) 1010 { 1011 struct jbd2_stats_proc_session *s = seq->private; 1012 1013 if (v != SEQ_START_TOKEN) 1014 return 0; 1015 seq_printf(seq, "%lu transactions (%lu requested), " 1016 "each up to %u blocks\n", 1017 s->stats->ts_tid, s->stats->ts_requested, 1018 s->journal->j_max_transaction_buffers); 1019 if (s->stats->ts_tid == 0) 1020 return 0; 1021 seq_printf(seq, "average: \n %ums waiting for transaction\n", 1022 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid)); 1023 seq_printf(seq, " %ums request delay\n", 1024 (s->stats->ts_requested == 0) ? 0 : 1025 jiffies_to_msecs(s->stats->run.rs_request_delay / 1026 s->stats->ts_requested)); 1027 seq_printf(seq, " %ums running transaction\n", 1028 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid)); 1029 seq_printf(seq, " %ums transaction was being locked\n", 1030 jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid)); 1031 seq_printf(seq, " %ums flushing data (in ordered mode)\n", 1032 jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid)); 1033 seq_printf(seq, " %ums logging transaction\n", 1034 jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid)); 1035 seq_printf(seq, " %lluus average transaction commit time\n", 1036 div_u64(s->journal->j_average_commit_time, 1000)); 1037 seq_printf(seq, " %lu handles per transaction\n", 1038 s->stats->run.rs_handle_count / s->stats->ts_tid); 1039 seq_printf(seq, " %lu blocks per transaction\n", 1040 s->stats->run.rs_blocks / s->stats->ts_tid); 1041 seq_printf(seq, " %lu logged blocks per transaction\n", 1042 s->stats->run.rs_blocks_logged / s->stats->ts_tid); 1043 return 0; 1044 } 1045 1046 static void jbd2_seq_info_stop(struct seq_file *seq, void *v) 1047 { 1048 } 1049 1050 static const struct seq_operations jbd2_seq_info_ops = { 1051 .start = jbd2_seq_info_start, 1052 .next = jbd2_seq_info_next, 1053 .stop = jbd2_seq_info_stop, 1054 .show = jbd2_seq_info_show, 1055 }; 1056 1057 static int jbd2_seq_info_open(struct inode *inode, struct file *file) 1058 { 1059 journal_t *journal = PDE_DATA(inode); 1060 struct jbd2_stats_proc_session *s; 1061 int rc, size; 1062 1063 s = kmalloc(sizeof(*s), GFP_KERNEL); 1064 if (s == NULL) 1065 return -ENOMEM; 1066 size = sizeof(struct transaction_stats_s); 1067 s->stats = kmalloc(size, GFP_KERNEL); 1068 if (s->stats == NULL) { 1069 kfree(s); 1070 return -ENOMEM; 1071 } 1072 spin_lock(&journal->j_history_lock); 1073 memcpy(s->stats, &journal->j_stats, size); 1074 s->journal = journal; 1075 spin_unlock(&journal->j_history_lock); 1076 1077 rc = seq_open(file, &jbd2_seq_info_ops); 1078 if (rc == 0) { 1079 struct seq_file *m = file->private_data; 1080 m->private = s; 1081 } else { 1082 kfree(s->stats); 1083 kfree(s); 1084 } 1085 return rc; 1086 1087 } 1088 1089 static int jbd2_seq_info_release(struct inode *inode, struct file *file) 1090 { 1091 struct seq_file *seq = file->private_data; 1092 struct jbd2_stats_proc_session *s = seq->private; 1093 kfree(s->stats); 1094 kfree(s); 1095 return seq_release(inode, file); 1096 } 1097 1098 static const struct file_operations jbd2_seq_info_fops = { 1099 .owner = THIS_MODULE, 1100 .open = jbd2_seq_info_open, 1101 .read = seq_read, 1102 .llseek = seq_lseek, 1103 .release = jbd2_seq_info_release, 1104 }; 1105 1106 static struct proc_dir_entry *proc_jbd2_stats; 1107 1108 static void jbd2_stats_proc_init(journal_t *journal) 1109 { 1110 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); 1111 if (journal->j_proc_entry) { 1112 proc_create_data("info", S_IRUGO, journal->j_proc_entry, 1113 &jbd2_seq_info_fops, journal); 1114 } 1115 } 1116 1117 static void jbd2_stats_proc_exit(journal_t *journal) 1118 { 1119 remove_proc_entry("info", journal->j_proc_entry); 1120 remove_proc_entry(journal->j_devname, proc_jbd2_stats); 1121 } 1122 1123 /* 1124 * Management for journal control blocks: functions to create and 1125 * destroy journal_t structures, and to initialise and read existing 1126 * journal blocks from disk. */ 1127 1128 /* First: create and setup a journal_t object in memory. We initialise 1129 * very few fields yet: that has to wait until we have created the 1130 * journal structures from from scratch, or loaded them from disk. */ 1131 1132 static journal_t *journal_init_common(struct block_device *bdev, 1133 struct block_device *fs_dev, 1134 unsigned long long start, int len, int blocksize) 1135 { 1136 static struct lock_class_key jbd2_trans_commit_key; 1137 journal_t *journal; 1138 int err; 1139 struct buffer_head *bh; 1140 int n; 1141 1142 journal = kzalloc(sizeof(*journal), GFP_KERNEL); 1143 if (!journal) 1144 return NULL; 1145 1146 init_waitqueue_head(&journal->j_wait_transaction_locked); 1147 init_waitqueue_head(&journal->j_wait_done_commit); 1148 init_waitqueue_head(&journal->j_wait_commit); 1149 init_waitqueue_head(&journal->j_wait_updates); 1150 init_waitqueue_head(&journal->j_wait_reserved); 1151 mutex_init(&journal->j_barrier); 1152 mutex_init(&journal->j_checkpoint_mutex); 1153 spin_lock_init(&journal->j_revoke_lock); 1154 spin_lock_init(&journal->j_list_lock); 1155 rwlock_init(&journal->j_state_lock); 1156 1157 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 1158 journal->j_min_batch_time = 0; 1159 journal->j_max_batch_time = 15000; /* 15ms */ 1160 atomic_set(&journal->j_reserved_credits, 0); 1161 1162 /* The journal is marked for error until we succeed with recovery! */ 1163 journal->j_flags = JBD2_ABORT; 1164 1165 /* Set up a default-sized revoke table for the new mount. */ 1166 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); 1167 if (err) 1168 goto err_cleanup; 1169 1170 spin_lock_init(&journal->j_history_lock); 1171 1172 lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", 1173 &jbd2_trans_commit_key, 0); 1174 1175 /* journal descriptor can store up to n blocks -bzzz */ 1176 journal->j_blocksize = blocksize; 1177 journal->j_dev = bdev; 1178 journal->j_fs_dev = fs_dev; 1179 journal->j_blk_offset = start; 1180 journal->j_maxlen = len; 1181 n = journal->j_blocksize / sizeof(journal_block_tag_t); 1182 journal->j_wbufsize = n; 1183 journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), 1184 GFP_KERNEL); 1185 if (!journal->j_wbuf) 1186 goto err_cleanup; 1187 1188 bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize); 1189 if (!bh) { 1190 pr_err("%s: Cannot get buffer for journal superblock\n", 1191 __func__); 1192 goto err_cleanup; 1193 } 1194 journal->j_sb_buffer = bh; 1195 journal->j_superblock = (journal_superblock_t *)bh->b_data; 1196 1197 return journal; 1198 1199 err_cleanup: 1200 kfree(journal->j_wbuf); 1201 jbd2_journal_destroy_revoke(journal); 1202 kfree(journal); 1203 return NULL; 1204 } 1205 1206 /* jbd2_journal_init_dev and jbd2_journal_init_inode: 1207 * 1208 * Create a journal structure assigned some fixed set of disk blocks to 1209 * the journal. We don't actually touch those disk blocks yet, but we 1210 * need to set up all of the mapping information to tell the journaling 1211 * system where the journal blocks are. 1212 * 1213 */ 1214 1215 /** 1216 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure 1217 * @bdev: Block device on which to create the journal 1218 * @fs_dev: Device which hold journalled filesystem for this journal. 1219 * @start: Block nr Start of journal. 1220 * @len: Length of the journal in blocks. 1221 * @blocksize: blocksize of journalling device 1222 * 1223 * Returns: a newly created journal_t * 1224 * 1225 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 1226 * range of blocks on an arbitrary block device. 1227 * 1228 */ 1229 journal_t *jbd2_journal_init_dev(struct block_device *bdev, 1230 struct block_device *fs_dev, 1231 unsigned long long start, int len, int blocksize) 1232 { 1233 journal_t *journal; 1234 1235 journal = journal_init_common(bdev, fs_dev, start, len, blocksize); 1236 if (!journal) 1237 return NULL; 1238 1239 bdevname(journal->j_dev, journal->j_devname); 1240 strreplace(journal->j_devname, '/', '!'); 1241 jbd2_stats_proc_init(journal); 1242 1243 return journal; 1244 } 1245 1246 /** 1247 * journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode. 1248 * @inode: An inode to create the journal in 1249 * 1250 * jbd2_journal_init_inode creates a journal which maps an on-disk inode as 1251 * the journal. The inode must exist already, must support bmap() and 1252 * must have all data blocks preallocated. 1253 */ 1254 journal_t *jbd2_journal_init_inode(struct inode *inode) 1255 { 1256 journal_t *journal; 1257 char *p; 1258 unsigned long long blocknr; 1259 1260 blocknr = bmap(inode, 0); 1261 if (!blocknr) { 1262 pr_err("%s: Cannot locate journal superblock\n", 1263 __func__); 1264 return NULL; 1265 } 1266 1267 jbd_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n", 1268 inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size, 1269 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 1270 1271 journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev, 1272 blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits, 1273 inode->i_sb->s_blocksize); 1274 if (!journal) 1275 return NULL; 1276 1277 journal->j_inode = inode; 1278 bdevname(journal->j_dev, journal->j_devname); 1279 p = strreplace(journal->j_devname, '/', '!'); 1280 sprintf(p, "-%lu", journal->j_inode->i_ino); 1281 jbd2_stats_proc_init(journal); 1282 1283 return journal; 1284 } 1285 1286 /* 1287 * If the journal init or create aborts, we need to mark the journal 1288 * superblock as being NULL to prevent the journal destroy from writing 1289 * back a bogus superblock. 1290 */ 1291 static void journal_fail_superblock (journal_t *journal) 1292 { 1293 struct buffer_head *bh = journal->j_sb_buffer; 1294 brelse(bh); 1295 journal->j_sb_buffer = NULL; 1296 } 1297 1298 /* 1299 * Given a journal_t structure, initialise the various fields for 1300 * startup of a new journaling session. We use this both when creating 1301 * a journal, and after recovering an old journal to reset it for 1302 * subsequent use. 1303 */ 1304 1305 static int journal_reset(journal_t *journal) 1306 { 1307 journal_superblock_t *sb = journal->j_superblock; 1308 unsigned long long first, last; 1309 1310 first = be32_to_cpu(sb->s_first); 1311 last = be32_to_cpu(sb->s_maxlen); 1312 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { 1313 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n", 1314 first, last); 1315 journal_fail_superblock(journal); 1316 return -EINVAL; 1317 } 1318 1319 journal->j_first = first; 1320 journal->j_last = last; 1321 1322 journal->j_head = first; 1323 journal->j_tail = first; 1324 journal->j_free = last - first; 1325 1326 journal->j_tail_sequence = journal->j_transaction_sequence; 1327 journal->j_commit_sequence = journal->j_transaction_sequence - 1; 1328 journal->j_commit_request = journal->j_commit_sequence; 1329 1330 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1331 1332 /* 1333 * As a special case, if the on-disk copy is already marked as needing 1334 * no recovery (s_start == 0), then we can safely defer the superblock 1335 * update until the next commit by setting JBD2_FLUSHED. This avoids 1336 * attempting a write to a potential-readonly device. 1337 */ 1338 if (sb->s_start == 0) { 1339 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " 1340 "(start %ld, seq %d, errno %d)\n", 1341 journal->j_tail, journal->j_tail_sequence, 1342 journal->j_errno); 1343 journal->j_flags |= JBD2_FLUSHED; 1344 } else { 1345 /* Lock here to make assertions happy... */ 1346 mutex_lock_io(&journal->j_checkpoint_mutex); 1347 /* 1348 * Update log tail information. We use REQ_FUA since new 1349 * transaction will start reusing journal space and so we 1350 * must make sure information about current log tail is on 1351 * disk before that. 1352 */ 1353 jbd2_journal_update_sb_log_tail(journal, 1354 journal->j_tail_sequence, 1355 journal->j_tail, 1356 REQ_SYNC | REQ_FUA); 1357 mutex_unlock(&journal->j_checkpoint_mutex); 1358 } 1359 return jbd2_journal_start_thread(journal); 1360 } 1361 1362 static int jbd2_write_superblock(journal_t *journal, int write_flags) 1363 { 1364 struct buffer_head *bh = journal->j_sb_buffer; 1365 journal_superblock_t *sb = journal->j_superblock; 1366 int ret; 1367 1368 trace_jbd2_write_superblock(journal, write_flags); 1369 if (!(journal->j_flags & JBD2_BARRIER)) 1370 write_flags &= ~(REQ_FUA | REQ_PREFLUSH); 1371 lock_buffer(bh); 1372 if (buffer_write_io_error(bh)) { 1373 /* 1374 * Oh, dear. A previous attempt to write the journal 1375 * superblock failed. This could happen because the 1376 * USB device was yanked out. Or it could happen to 1377 * be a transient write error and maybe the block will 1378 * be remapped. Nothing we can do but to retry the 1379 * write and hope for the best. 1380 */ 1381 printk(KERN_ERR "JBD2: previous I/O error detected " 1382 "for journal superblock update for %s.\n", 1383 journal->j_devname); 1384 clear_buffer_write_io_error(bh); 1385 set_buffer_uptodate(bh); 1386 } 1387 jbd2_superblock_csum_set(journal, sb); 1388 get_bh(bh); 1389 bh->b_end_io = end_buffer_write_sync; 1390 ret = submit_bh(REQ_OP_WRITE, write_flags, bh); 1391 wait_on_buffer(bh); 1392 if (buffer_write_io_error(bh)) { 1393 clear_buffer_write_io_error(bh); 1394 set_buffer_uptodate(bh); 1395 ret = -EIO; 1396 } 1397 if (ret) { 1398 printk(KERN_ERR "JBD2: Error %d detected when updating " 1399 "journal superblock for %s.\n", ret, 1400 journal->j_devname); 1401 jbd2_journal_abort(journal, ret); 1402 } 1403 1404 return ret; 1405 } 1406 1407 /** 1408 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. 1409 * @journal: The journal to update. 1410 * @tail_tid: TID of the new transaction at the tail of the log 1411 * @tail_block: The first block of the transaction at the tail of the log 1412 * @write_op: With which operation should we write the journal sb 1413 * 1414 * Update a journal's superblock information about log tail and write it to 1415 * disk, waiting for the IO to complete. 1416 */ 1417 int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, 1418 unsigned long tail_block, int write_op) 1419 { 1420 journal_superblock_t *sb = journal->j_superblock; 1421 int ret; 1422 1423 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1424 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", 1425 tail_block, tail_tid); 1426 1427 sb->s_sequence = cpu_to_be32(tail_tid); 1428 sb->s_start = cpu_to_be32(tail_block); 1429 1430 ret = jbd2_write_superblock(journal, write_op); 1431 if (ret) 1432 goto out; 1433 1434 /* Log is no longer empty */ 1435 write_lock(&journal->j_state_lock); 1436 WARN_ON(!sb->s_sequence); 1437 journal->j_flags &= ~JBD2_FLUSHED; 1438 write_unlock(&journal->j_state_lock); 1439 1440 out: 1441 return ret; 1442 } 1443 1444 /** 1445 * jbd2_mark_journal_empty() - Mark on disk journal as empty. 1446 * @journal: The journal to update. 1447 * @write_op: With which operation should we write the journal sb 1448 * 1449 * Update a journal's dynamic superblock fields to show that journal is empty. 1450 * Write updated superblock to disk waiting for IO to complete. 1451 */ 1452 static void jbd2_mark_journal_empty(journal_t *journal, int write_op) 1453 { 1454 journal_superblock_t *sb = journal->j_superblock; 1455 1456 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1457 read_lock(&journal->j_state_lock); 1458 /* Is it already empty? */ 1459 if (sb->s_start == 0) { 1460 read_unlock(&journal->j_state_lock); 1461 return; 1462 } 1463 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", 1464 journal->j_tail_sequence); 1465 1466 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1467 sb->s_start = cpu_to_be32(0); 1468 read_unlock(&journal->j_state_lock); 1469 1470 jbd2_write_superblock(journal, write_op); 1471 1472 /* Log is no longer empty */ 1473 write_lock(&journal->j_state_lock); 1474 journal->j_flags |= JBD2_FLUSHED; 1475 write_unlock(&journal->j_state_lock); 1476 } 1477 1478 1479 /** 1480 * jbd2_journal_update_sb_errno() - Update error in the journal. 1481 * @journal: The journal to update. 1482 * 1483 * Update a journal's errno. Write updated superblock to disk waiting for IO 1484 * to complete. 1485 */ 1486 void jbd2_journal_update_sb_errno(journal_t *journal) 1487 { 1488 journal_superblock_t *sb = journal->j_superblock; 1489 1490 read_lock(&journal->j_state_lock); 1491 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", 1492 journal->j_errno); 1493 sb->s_errno = cpu_to_be32(journal->j_errno); 1494 read_unlock(&journal->j_state_lock); 1495 1496 jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA); 1497 } 1498 EXPORT_SYMBOL(jbd2_journal_update_sb_errno); 1499 1500 /* 1501 * Read the superblock for a given journal, performing initial 1502 * validation of the format. 1503 */ 1504 static int journal_get_superblock(journal_t *journal) 1505 { 1506 struct buffer_head *bh; 1507 journal_superblock_t *sb; 1508 int err = -EIO; 1509 1510 bh = journal->j_sb_buffer; 1511 1512 J_ASSERT(bh != NULL); 1513 if (!buffer_uptodate(bh)) { 1514 ll_rw_block(REQ_OP_READ, 0, 1, &bh); 1515 wait_on_buffer(bh); 1516 if (!buffer_uptodate(bh)) { 1517 printk(KERN_ERR 1518 "JBD2: IO error reading journal superblock\n"); 1519 goto out; 1520 } 1521 } 1522 1523 if (buffer_verified(bh)) 1524 return 0; 1525 1526 sb = journal->j_superblock; 1527 1528 err = -EINVAL; 1529 1530 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 1531 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1532 printk(KERN_WARNING "JBD2: no valid journal superblock found\n"); 1533 goto out; 1534 } 1535 1536 switch(be32_to_cpu(sb->s_header.h_blocktype)) { 1537 case JBD2_SUPERBLOCK_V1: 1538 journal->j_format_version = 1; 1539 break; 1540 case JBD2_SUPERBLOCK_V2: 1541 journal->j_format_version = 2; 1542 break; 1543 default: 1544 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n"); 1545 goto out; 1546 } 1547 1548 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) 1549 journal->j_maxlen = be32_to_cpu(sb->s_maxlen); 1550 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { 1551 printk(KERN_WARNING "JBD2: journal file too short\n"); 1552 goto out; 1553 } 1554 1555 if (be32_to_cpu(sb->s_first) == 0 || 1556 be32_to_cpu(sb->s_first) >= journal->j_maxlen) { 1557 printk(KERN_WARNING 1558 "JBD2: Invalid start block of journal: %u\n", 1559 be32_to_cpu(sb->s_first)); 1560 goto out; 1561 } 1562 1563 if (jbd2_has_feature_csum2(journal) && 1564 jbd2_has_feature_csum3(journal)) { 1565 /* Can't have checksum v2 and v3 at the same time! */ 1566 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " 1567 "at the same time!\n"); 1568 goto out; 1569 } 1570 1571 if (jbd2_journal_has_csum_v2or3_feature(journal) && 1572 jbd2_has_feature_checksum(journal)) { 1573 /* Can't have checksum v1 and v2 on at the same time! */ 1574 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " 1575 "at the same time!\n"); 1576 goto out; 1577 } 1578 1579 if (!jbd2_verify_csum_type(journal, sb)) { 1580 printk(KERN_ERR "JBD2: Unknown checksum type\n"); 1581 goto out; 1582 } 1583 1584 /* Load the checksum driver */ 1585 if (jbd2_journal_has_csum_v2or3_feature(journal)) { 1586 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1587 if (IS_ERR(journal->j_chksum_driver)) { 1588 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 1589 err = PTR_ERR(journal->j_chksum_driver); 1590 journal->j_chksum_driver = NULL; 1591 goto out; 1592 } 1593 } 1594 1595 /* Check superblock checksum */ 1596 if (!jbd2_superblock_csum_verify(journal, sb)) { 1597 printk(KERN_ERR "JBD2: journal checksum error\n"); 1598 err = -EFSBADCRC; 1599 goto out; 1600 } 1601 1602 /* Precompute checksum seed for all metadata */ 1603 if (jbd2_journal_has_csum_v2or3(journal)) 1604 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1605 sizeof(sb->s_uuid)); 1606 1607 set_buffer_verified(bh); 1608 1609 return 0; 1610 1611 out: 1612 journal_fail_superblock(journal); 1613 return err; 1614 } 1615 1616 /* 1617 * Load the on-disk journal superblock and read the key fields into the 1618 * journal_t. 1619 */ 1620 1621 static int load_superblock(journal_t *journal) 1622 { 1623 int err; 1624 journal_superblock_t *sb; 1625 1626 err = journal_get_superblock(journal); 1627 if (err) 1628 return err; 1629 1630 sb = journal->j_superblock; 1631 1632 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); 1633 journal->j_tail = be32_to_cpu(sb->s_start); 1634 journal->j_first = be32_to_cpu(sb->s_first); 1635 journal->j_last = be32_to_cpu(sb->s_maxlen); 1636 journal->j_errno = be32_to_cpu(sb->s_errno); 1637 1638 return 0; 1639 } 1640 1641 1642 /** 1643 * int jbd2_journal_load() - Read journal from disk. 1644 * @journal: Journal to act on. 1645 * 1646 * Given a journal_t structure which tells us which disk blocks contain 1647 * a journal, read the journal from disk to initialise the in-memory 1648 * structures. 1649 */ 1650 int jbd2_journal_load(journal_t *journal) 1651 { 1652 int err; 1653 journal_superblock_t *sb; 1654 1655 err = load_superblock(journal); 1656 if (err) 1657 return err; 1658 1659 sb = journal->j_superblock; 1660 /* If this is a V2 superblock, then we have to check the 1661 * features flags on it. */ 1662 1663 if (journal->j_format_version >= 2) { 1664 if ((sb->s_feature_ro_compat & 1665 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 1666 (sb->s_feature_incompat & 1667 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 1668 printk(KERN_WARNING 1669 "JBD2: Unrecognised features on journal\n"); 1670 return -EINVAL; 1671 } 1672 } 1673 1674 /* 1675 * Create a slab for this blocksize 1676 */ 1677 err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); 1678 if (err) 1679 return err; 1680 1681 /* Let the recovery code check whether it needs to recover any 1682 * data from the journal. */ 1683 if (jbd2_journal_recover(journal)) 1684 goto recovery_error; 1685 1686 if (journal->j_failed_commit) { 1687 printk(KERN_ERR "JBD2: journal transaction %u on %s " 1688 "is corrupt.\n", journal->j_failed_commit, 1689 journal->j_devname); 1690 return -EFSCORRUPTED; 1691 } 1692 1693 /* OK, we've finished with the dynamic journal bits: 1694 * reinitialise the dynamic contents of the superblock in memory 1695 * and reset them on disk. */ 1696 if (journal_reset(journal)) 1697 goto recovery_error; 1698 1699 journal->j_flags &= ~JBD2_ABORT; 1700 journal->j_flags |= JBD2_LOADED; 1701 return 0; 1702 1703 recovery_error: 1704 printk(KERN_WARNING "JBD2: recovery failed\n"); 1705 return -EIO; 1706 } 1707 1708 /** 1709 * void jbd2_journal_destroy() - Release a journal_t structure. 1710 * @journal: Journal to act on. 1711 * 1712 * Release a journal_t structure once it is no longer in use by the 1713 * journaled object. 1714 * Return <0 if we couldn't clean up the journal. 1715 */ 1716 int jbd2_journal_destroy(journal_t *journal) 1717 { 1718 int err = 0; 1719 1720 /* Wait for the commit thread to wake up and die. */ 1721 journal_kill_thread(journal); 1722 1723 /* Force a final log commit */ 1724 if (journal->j_running_transaction) 1725 jbd2_journal_commit_transaction(journal); 1726 1727 /* Force any old transactions to disk */ 1728 1729 /* Totally anal locking here... */ 1730 spin_lock(&journal->j_list_lock); 1731 while (journal->j_checkpoint_transactions != NULL) { 1732 spin_unlock(&journal->j_list_lock); 1733 mutex_lock_io(&journal->j_checkpoint_mutex); 1734 err = jbd2_log_do_checkpoint(journal); 1735 mutex_unlock(&journal->j_checkpoint_mutex); 1736 /* 1737 * If checkpointing failed, just free the buffers to avoid 1738 * looping forever 1739 */ 1740 if (err) { 1741 jbd2_journal_destroy_checkpoint(journal); 1742 spin_lock(&journal->j_list_lock); 1743 break; 1744 } 1745 spin_lock(&journal->j_list_lock); 1746 } 1747 1748 J_ASSERT(journal->j_running_transaction == NULL); 1749 J_ASSERT(journal->j_committing_transaction == NULL); 1750 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1751 spin_unlock(&journal->j_list_lock); 1752 1753 if (journal->j_sb_buffer) { 1754 if (!is_journal_aborted(journal)) { 1755 mutex_lock_io(&journal->j_checkpoint_mutex); 1756 1757 write_lock(&journal->j_state_lock); 1758 journal->j_tail_sequence = 1759 ++journal->j_transaction_sequence; 1760 write_unlock(&journal->j_state_lock); 1761 1762 jbd2_mark_journal_empty(journal, 1763 REQ_SYNC | REQ_PREFLUSH | REQ_FUA); 1764 mutex_unlock(&journal->j_checkpoint_mutex); 1765 } else 1766 err = -EIO; 1767 brelse(journal->j_sb_buffer); 1768 } 1769 1770 if (journal->j_proc_entry) 1771 jbd2_stats_proc_exit(journal); 1772 iput(journal->j_inode); 1773 if (journal->j_revoke) 1774 jbd2_journal_destroy_revoke(journal); 1775 if (journal->j_chksum_driver) 1776 crypto_free_shash(journal->j_chksum_driver); 1777 kfree(journal->j_wbuf); 1778 kfree(journal); 1779 1780 return err; 1781 } 1782 1783 1784 /** 1785 *int jbd2_journal_check_used_features () - Check if features specified are used. 1786 * @journal: Journal to check. 1787 * @compat: bitmask of compatible features 1788 * @ro: bitmask of features that force read-only mount 1789 * @incompat: bitmask of incompatible features 1790 * 1791 * Check whether the journal uses all of a given set of 1792 * features. Return true (non-zero) if it does. 1793 **/ 1794 1795 int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, 1796 unsigned long ro, unsigned long incompat) 1797 { 1798 journal_superblock_t *sb; 1799 1800 if (!compat && !ro && !incompat) 1801 return 1; 1802 /* Load journal superblock if it is not loaded yet. */ 1803 if (journal->j_format_version == 0 && 1804 journal_get_superblock(journal) != 0) 1805 return 0; 1806 if (journal->j_format_version == 1) 1807 return 0; 1808 1809 sb = journal->j_superblock; 1810 1811 if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && 1812 ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && 1813 ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) 1814 return 1; 1815 1816 return 0; 1817 } 1818 1819 /** 1820 * int jbd2_journal_check_available_features() - Check feature set in journalling layer 1821 * @journal: Journal to check. 1822 * @compat: bitmask of compatible features 1823 * @ro: bitmask of features that force read-only mount 1824 * @incompat: bitmask of incompatible features 1825 * 1826 * Check whether the journaling code supports the use of 1827 * all of a given set of features on this journal. Return true 1828 * (non-zero) if it can. */ 1829 1830 int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, 1831 unsigned long ro, unsigned long incompat) 1832 { 1833 if (!compat && !ro && !incompat) 1834 return 1; 1835 1836 /* We can support any known requested features iff the 1837 * superblock is in version 2. Otherwise we fail to support any 1838 * extended sb features. */ 1839 1840 if (journal->j_format_version != 2) 1841 return 0; 1842 1843 if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat && 1844 (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro && 1845 (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat) 1846 return 1; 1847 1848 return 0; 1849 } 1850 1851 /** 1852 * int jbd2_journal_set_features () - Mark a given journal feature in the superblock 1853 * @journal: Journal to act on. 1854 * @compat: bitmask of compatible features 1855 * @ro: bitmask of features that force read-only mount 1856 * @incompat: bitmask of incompatible features 1857 * 1858 * Mark a given journal feature as present on the 1859 * superblock. Returns true if the requested features could be set. 1860 * 1861 */ 1862 1863 int jbd2_journal_set_features (journal_t *journal, unsigned long compat, 1864 unsigned long ro, unsigned long incompat) 1865 { 1866 #define INCOMPAT_FEATURE_ON(f) \ 1867 ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f))) 1868 #define COMPAT_FEATURE_ON(f) \ 1869 ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f))) 1870 journal_superblock_t *sb; 1871 1872 if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) 1873 return 1; 1874 1875 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1876 return 0; 1877 1878 /* If enabling v2 checksums, turn on v3 instead */ 1879 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { 1880 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; 1881 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; 1882 } 1883 1884 /* Asking for checksumming v3 and v1? Only give them v3. */ 1885 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && 1886 compat & JBD2_FEATURE_COMPAT_CHECKSUM) 1887 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 1888 1889 jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", 1890 compat, ro, incompat); 1891 1892 sb = journal->j_superblock; 1893 1894 /* If enabling v3 checksums, update superblock */ 1895 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { 1896 sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1897 sb->s_feature_compat &= 1898 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 1899 1900 /* Load the checksum driver */ 1901 if (journal->j_chksum_driver == NULL) { 1902 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 1903 0, 0); 1904 if (IS_ERR(journal->j_chksum_driver)) { 1905 printk(KERN_ERR "JBD2: Cannot load crc32c " 1906 "driver.\n"); 1907 journal->j_chksum_driver = NULL; 1908 return 0; 1909 } 1910 1911 /* Precompute checksum seed for all metadata */ 1912 journal->j_csum_seed = jbd2_chksum(journal, ~0, 1913 sb->s_uuid, 1914 sizeof(sb->s_uuid)); 1915 } 1916 } 1917 1918 /* If enabling v1 checksums, downgrade superblock */ 1919 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 1920 sb->s_feature_incompat &= 1921 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | 1922 JBD2_FEATURE_INCOMPAT_CSUM_V3); 1923 1924 sb->s_feature_compat |= cpu_to_be32(compat); 1925 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1926 sb->s_feature_incompat |= cpu_to_be32(incompat); 1927 1928 return 1; 1929 #undef COMPAT_FEATURE_ON 1930 #undef INCOMPAT_FEATURE_ON 1931 } 1932 1933 /* 1934 * jbd2_journal_clear_features () - Clear a given journal feature in the 1935 * superblock 1936 * @journal: Journal to act on. 1937 * @compat: bitmask of compatible features 1938 * @ro: bitmask of features that force read-only mount 1939 * @incompat: bitmask of incompatible features 1940 * 1941 * Clear a given journal feature as present on the 1942 * superblock. 1943 */ 1944 void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, 1945 unsigned long ro, unsigned long incompat) 1946 { 1947 journal_superblock_t *sb; 1948 1949 jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n", 1950 compat, ro, incompat); 1951 1952 sb = journal->j_superblock; 1953 1954 sb->s_feature_compat &= ~cpu_to_be32(compat); 1955 sb->s_feature_ro_compat &= ~cpu_to_be32(ro); 1956 sb->s_feature_incompat &= ~cpu_to_be32(incompat); 1957 } 1958 EXPORT_SYMBOL(jbd2_journal_clear_features); 1959 1960 /** 1961 * int jbd2_journal_flush () - Flush journal 1962 * @journal: Journal to act on. 1963 * 1964 * Flush all data for a given journal to disk and empty the journal. 1965 * Filesystems can use this when remounting readonly to ensure that 1966 * recovery does not need to happen on remount. 1967 */ 1968 1969 int jbd2_journal_flush(journal_t *journal) 1970 { 1971 int err = 0; 1972 transaction_t *transaction = NULL; 1973 1974 write_lock(&journal->j_state_lock); 1975 1976 /* Force everything buffered to the log... */ 1977 if (journal->j_running_transaction) { 1978 transaction = journal->j_running_transaction; 1979 __jbd2_log_start_commit(journal, transaction->t_tid); 1980 } else if (journal->j_committing_transaction) 1981 transaction = journal->j_committing_transaction; 1982 1983 /* Wait for the log commit to complete... */ 1984 if (transaction) { 1985 tid_t tid = transaction->t_tid; 1986 1987 write_unlock(&journal->j_state_lock); 1988 jbd2_log_wait_commit(journal, tid); 1989 } else { 1990 write_unlock(&journal->j_state_lock); 1991 } 1992 1993 /* ...and flush everything in the log out to disk. */ 1994 spin_lock(&journal->j_list_lock); 1995 while (!err && journal->j_checkpoint_transactions != NULL) { 1996 spin_unlock(&journal->j_list_lock); 1997 mutex_lock_io(&journal->j_checkpoint_mutex); 1998 err = jbd2_log_do_checkpoint(journal); 1999 mutex_unlock(&journal->j_checkpoint_mutex); 2000 spin_lock(&journal->j_list_lock); 2001 } 2002 spin_unlock(&journal->j_list_lock); 2003 2004 if (is_journal_aborted(journal)) 2005 return -EIO; 2006 2007 mutex_lock_io(&journal->j_checkpoint_mutex); 2008 if (!err) { 2009 err = jbd2_cleanup_journal_tail(journal); 2010 if (err < 0) { 2011 mutex_unlock(&journal->j_checkpoint_mutex); 2012 goto out; 2013 } 2014 err = 0; 2015 } 2016 2017 /* Finally, mark the journal as really needing no recovery. 2018 * This sets s_start==0 in the underlying superblock, which is 2019 * the magic code for a fully-recovered superblock. Any future 2020 * commits of data to the journal will restore the current 2021 * s_start value. */ 2022 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); 2023 mutex_unlock(&journal->j_checkpoint_mutex); 2024 write_lock(&journal->j_state_lock); 2025 J_ASSERT(!journal->j_running_transaction); 2026 J_ASSERT(!journal->j_committing_transaction); 2027 J_ASSERT(!journal->j_checkpoint_transactions); 2028 J_ASSERT(journal->j_head == journal->j_tail); 2029 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 2030 write_unlock(&journal->j_state_lock); 2031 out: 2032 return err; 2033 } 2034 2035 /** 2036 * int jbd2_journal_wipe() - Wipe journal contents 2037 * @journal: Journal to act on. 2038 * @write: flag (see below) 2039 * 2040 * Wipe out all of the contents of a journal, safely. This will produce 2041 * a warning if the journal contains any valid recovery information. 2042 * Must be called between journal_init_*() and jbd2_journal_load(). 2043 * 2044 * If 'write' is non-zero, then we wipe out the journal on disk; otherwise 2045 * we merely suppress recovery. 2046 */ 2047 2048 int jbd2_journal_wipe(journal_t *journal, int write) 2049 { 2050 int err = 0; 2051 2052 J_ASSERT (!(journal->j_flags & JBD2_LOADED)); 2053 2054 err = load_superblock(journal); 2055 if (err) 2056 return err; 2057 2058 if (!journal->j_tail) 2059 goto no_recovery; 2060 2061 printk(KERN_WARNING "JBD2: %s recovery information on journal\n", 2062 write ? "Clearing" : "Ignoring"); 2063 2064 err = jbd2_journal_skip_recovery(journal); 2065 if (write) { 2066 /* Lock to make assertions happy... */ 2067 mutex_lock(&journal->j_checkpoint_mutex); 2068 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); 2069 mutex_unlock(&journal->j_checkpoint_mutex); 2070 } 2071 2072 no_recovery: 2073 return err; 2074 } 2075 2076 /* 2077 * Journal abort has very specific semantics, which we describe 2078 * for journal abort. 2079 * 2080 * Two internal functions, which provide abort to the jbd layer 2081 * itself are here. 2082 */ 2083 2084 /* 2085 * Quick version for internal journal use (doesn't lock the journal). 2086 * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, 2087 * and don't attempt to make any other journal updates. 2088 */ 2089 void __jbd2_journal_abort_hard(journal_t *journal) 2090 { 2091 transaction_t *transaction; 2092 2093 if (journal->j_flags & JBD2_ABORT) 2094 return; 2095 2096 printk(KERN_ERR "Aborting journal on device %s.\n", 2097 journal->j_devname); 2098 2099 write_lock(&journal->j_state_lock); 2100 journal->j_flags |= JBD2_ABORT; 2101 transaction = journal->j_running_transaction; 2102 if (transaction) 2103 __jbd2_log_start_commit(journal, transaction->t_tid); 2104 write_unlock(&journal->j_state_lock); 2105 } 2106 2107 /* Soft abort: record the abort error status in the journal superblock, 2108 * but don't do any other IO. */ 2109 static void __journal_abort_soft (journal_t *journal, int errno) 2110 { 2111 if (journal->j_flags & JBD2_ABORT) 2112 return; 2113 2114 if (!journal->j_errno) 2115 journal->j_errno = errno; 2116 2117 __jbd2_journal_abort_hard(journal); 2118 2119 if (errno) { 2120 jbd2_journal_update_sb_errno(journal); 2121 write_lock(&journal->j_state_lock); 2122 journal->j_flags |= JBD2_REC_ERR; 2123 write_unlock(&journal->j_state_lock); 2124 } 2125 } 2126 2127 /** 2128 * void jbd2_journal_abort () - Shutdown the journal immediately. 2129 * @journal: the journal to shutdown. 2130 * @errno: an error number to record in the journal indicating 2131 * the reason for the shutdown. 2132 * 2133 * Perform a complete, immediate shutdown of the ENTIRE 2134 * journal (not of a single transaction). This operation cannot be 2135 * undone without closing and reopening the journal. 2136 * 2137 * The jbd2_journal_abort function is intended to support higher level error 2138 * recovery mechanisms such as the ext2/ext3 remount-readonly error 2139 * mode. 2140 * 2141 * Journal abort has very specific semantics. Any existing dirty, 2142 * unjournaled buffers in the main filesystem will still be written to 2143 * disk by bdflush, but the journaling mechanism will be suspended 2144 * immediately and no further transaction commits will be honoured. 2145 * 2146 * Any dirty, journaled buffers will be written back to disk without 2147 * hitting the journal. Atomicity cannot be guaranteed on an aborted 2148 * filesystem, but we _do_ attempt to leave as much data as possible 2149 * behind for fsck to use for cleanup. 2150 * 2151 * Any attempt to get a new transaction handle on a journal which is in 2152 * ABORT state will just result in an -EROFS error return. A 2153 * jbd2_journal_stop on an existing handle will return -EIO if we have 2154 * entered abort state during the update. 2155 * 2156 * Recursive transactions are not disturbed by journal abort until the 2157 * final jbd2_journal_stop, which will receive the -EIO error. 2158 * 2159 * Finally, the jbd2_journal_abort call allows the caller to supply an errno 2160 * which will be recorded (if possible) in the journal superblock. This 2161 * allows a client to record failure conditions in the middle of a 2162 * transaction without having to complete the transaction to record the 2163 * failure to disk. ext3_error, for example, now uses this 2164 * functionality. 2165 * 2166 * Errors which originate from within the journaling layer will NOT 2167 * supply an errno; a null errno implies that absolutely no further 2168 * writes are done to the journal (unless there are any already in 2169 * progress). 2170 * 2171 */ 2172 2173 void jbd2_journal_abort(journal_t *journal, int errno) 2174 { 2175 __journal_abort_soft(journal, errno); 2176 } 2177 2178 /** 2179 * int jbd2_journal_errno () - returns the journal's error state. 2180 * @journal: journal to examine. 2181 * 2182 * This is the errno number set with jbd2_journal_abort(), the last 2183 * time the journal was mounted - if the journal was stopped 2184 * without calling abort this will be 0. 2185 * 2186 * If the journal has been aborted on this mount time -EROFS will 2187 * be returned. 2188 */ 2189 int jbd2_journal_errno(journal_t *journal) 2190 { 2191 int err; 2192 2193 read_lock(&journal->j_state_lock); 2194 if (journal->j_flags & JBD2_ABORT) 2195 err = -EROFS; 2196 else 2197 err = journal->j_errno; 2198 read_unlock(&journal->j_state_lock); 2199 return err; 2200 } 2201 2202 /** 2203 * int jbd2_journal_clear_err () - clears the journal's error state 2204 * @journal: journal to act on. 2205 * 2206 * An error must be cleared or acked to take a FS out of readonly 2207 * mode. 2208 */ 2209 int jbd2_journal_clear_err(journal_t *journal) 2210 { 2211 int err = 0; 2212 2213 write_lock(&journal->j_state_lock); 2214 if (journal->j_flags & JBD2_ABORT) 2215 err = -EROFS; 2216 else 2217 journal->j_errno = 0; 2218 write_unlock(&journal->j_state_lock); 2219 return err; 2220 } 2221 2222 /** 2223 * void jbd2_journal_ack_err() - Ack journal err. 2224 * @journal: journal to act on. 2225 * 2226 * An error must be cleared or acked to take a FS out of readonly 2227 * mode. 2228 */ 2229 void jbd2_journal_ack_err(journal_t *journal) 2230 { 2231 write_lock(&journal->j_state_lock); 2232 if (journal->j_errno) 2233 journal->j_flags |= JBD2_ACK_ERR; 2234 write_unlock(&journal->j_state_lock); 2235 } 2236 2237 int jbd2_journal_blocks_per_page(struct inode *inode) 2238 { 2239 return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); 2240 } 2241 2242 /* 2243 * helper functions to deal with 32 or 64bit block numbers. 2244 */ 2245 size_t journal_tag_bytes(journal_t *journal) 2246 { 2247 size_t sz; 2248 2249 if (jbd2_has_feature_csum3(journal)) 2250 return sizeof(journal_block_tag3_t); 2251 2252 sz = sizeof(journal_block_tag_t); 2253 2254 if (jbd2_has_feature_csum2(journal)) 2255 sz += sizeof(__u16); 2256 2257 if (jbd2_has_feature_64bit(journal)) 2258 return sz; 2259 else 2260 return sz - sizeof(__u32); 2261 } 2262 2263 /* 2264 * JBD memory management 2265 * 2266 * These functions are used to allocate block-sized chunks of memory 2267 * used for making copies of buffer_head data. Very often it will be 2268 * page-sized chunks of data, but sometimes it will be in 2269 * sub-page-size chunks. (For example, 16k pages on Power systems 2270 * with a 4k block file system.) For blocks smaller than a page, we 2271 * use a SLAB allocator. There are slab caches for each block size, 2272 * which are allocated at mount time, if necessary, and we only free 2273 * (all of) the slab caches when/if the jbd2 module is unloaded. For 2274 * this reason we don't need to a mutex to protect access to 2275 * jbd2_slab[] allocating or releasing memory; only in 2276 * jbd2_journal_create_slab(). 2277 */ 2278 #define JBD2_MAX_SLABS 8 2279 static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; 2280 2281 static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { 2282 "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", 2283 "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" 2284 }; 2285 2286 2287 static void jbd2_journal_destroy_slabs(void) 2288 { 2289 int i; 2290 2291 for (i = 0; i < JBD2_MAX_SLABS; i++) { 2292 if (jbd2_slab[i]) 2293 kmem_cache_destroy(jbd2_slab[i]); 2294 jbd2_slab[i] = NULL; 2295 } 2296 } 2297 2298 static int jbd2_journal_create_slab(size_t size) 2299 { 2300 static DEFINE_MUTEX(jbd2_slab_create_mutex); 2301 int i = order_base_2(size) - 10; 2302 size_t slab_size; 2303 2304 if (size == PAGE_SIZE) 2305 return 0; 2306 2307 if (i >= JBD2_MAX_SLABS) 2308 return -EINVAL; 2309 2310 if (unlikely(i < 0)) 2311 i = 0; 2312 mutex_lock(&jbd2_slab_create_mutex); 2313 if (jbd2_slab[i]) { 2314 mutex_unlock(&jbd2_slab_create_mutex); 2315 return 0; /* Already created */ 2316 } 2317 2318 slab_size = 1 << (i+10); 2319 jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, 2320 slab_size, 0, NULL); 2321 mutex_unlock(&jbd2_slab_create_mutex); 2322 if (!jbd2_slab[i]) { 2323 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); 2324 return -ENOMEM; 2325 } 2326 return 0; 2327 } 2328 2329 static struct kmem_cache *get_slab(size_t size) 2330 { 2331 int i = order_base_2(size) - 10; 2332 2333 BUG_ON(i >= JBD2_MAX_SLABS); 2334 if (unlikely(i < 0)) 2335 i = 0; 2336 BUG_ON(jbd2_slab[i] == NULL); 2337 return jbd2_slab[i]; 2338 } 2339 2340 void *jbd2_alloc(size_t size, gfp_t flags) 2341 { 2342 void *ptr; 2343 2344 BUG_ON(size & (size-1)); /* Must be a power of 2 */ 2345 2346 if (size < PAGE_SIZE) 2347 ptr = kmem_cache_alloc(get_slab(size), flags); 2348 else 2349 ptr = (void *)__get_free_pages(flags, get_order(size)); 2350 2351 /* Check alignment; SLUB has gotten this wrong in the past, 2352 * and this can lead to user data corruption! */ 2353 BUG_ON(((unsigned long) ptr) & (size-1)); 2354 2355 return ptr; 2356 } 2357 2358 void jbd2_free(void *ptr, size_t size) 2359 { 2360 if (size < PAGE_SIZE) 2361 kmem_cache_free(get_slab(size), ptr); 2362 else 2363 free_pages((unsigned long)ptr, get_order(size)); 2364 }; 2365 2366 /* 2367 * Journal_head storage management 2368 */ 2369 static struct kmem_cache *jbd2_journal_head_cache; 2370 #ifdef CONFIG_JBD2_DEBUG 2371 static atomic_t nr_journal_heads = ATOMIC_INIT(0); 2372 #endif 2373 2374 static int jbd2_journal_init_journal_head_cache(void) 2375 { 2376 int retval; 2377 2378 J_ASSERT(jbd2_journal_head_cache == NULL); 2379 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", 2380 sizeof(struct journal_head), 2381 0, /* offset */ 2382 SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, 2383 NULL); /* ctor */ 2384 retval = 0; 2385 if (!jbd2_journal_head_cache) { 2386 retval = -ENOMEM; 2387 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n"); 2388 } 2389 return retval; 2390 } 2391 2392 static void jbd2_journal_destroy_journal_head_cache(void) 2393 { 2394 if (jbd2_journal_head_cache) { 2395 kmem_cache_destroy(jbd2_journal_head_cache); 2396 jbd2_journal_head_cache = NULL; 2397 } 2398 } 2399 2400 /* 2401 * journal_head splicing and dicing 2402 */ 2403 static struct journal_head *journal_alloc_journal_head(void) 2404 { 2405 struct journal_head *ret; 2406 2407 #ifdef CONFIG_JBD2_DEBUG 2408 atomic_inc(&nr_journal_heads); 2409 #endif 2410 ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); 2411 if (!ret) { 2412 jbd_debug(1, "out of memory for journal_head\n"); 2413 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); 2414 ret = kmem_cache_zalloc(jbd2_journal_head_cache, 2415 GFP_NOFS | __GFP_NOFAIL); 2416 } 2417 return ret; 2418 } 2419 2420 static void journal_free_journal_head(struct journal_head *jh) 2421 { 2422 #ifdef CONFIG_JBD2_DEBUG 2423 atomic_dec(&nr_journal_heads); 2424 memset(jh, JBD2_POISON_FREE, sizeof(*jh)); 2425 #endif 2426 kmem_cache_free(jbd2_journal_head_cache, jh); 2427 } 2428 2429 /* 2430 * A journal_head is attached to a buffer_head whenever JBD has an 2431 * interest in the buffer. 2432 * 2433 * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit 2434 * is set. This bit is tested in core kernel code where we need to take 2435 * JBD-specific actions. Testing the zeroness of ->b_private is not reliable 2436 * there. 2437 * 2438 * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. 2439 * 2440 * When a buffer has its BH_JBD bit set it is immune from being released by 2441 * core kernel code, mainly via ->b_count. 2442 * 2443 * A journal_head is detached from its buffer_head when the journal_head's 2444 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint 2445 * transaction (b_cp_transaction) hold their references to b_jcount. 2446 * 2447 * Various places in the kernel want to attach a journal_head to a buffer_head 2448 * _before_ attaching the journal_head to a transaction. To protect the 2449 * journal_head in this situation, jbd2_journal_add_journal_head elevates the 2450 * journal_head's b_jcount refcount by one. The caller must call 2451 * jbd2_journal_put_journal_head() to undo this. 2452 * 2453 * So the typical usage would be: 2454 * 2455 * (Attach a journal_head if needed. Increments b_jcount) 2456 * struct journal_head *jh = jbd2_journal_add_journal_head(bh); 2457 * ... 2458 * (Get another reference for transaction) 2459 * jbd2_journal_grab_journal_head(bh); 2460 * jh->b_transaction = xxx; 2461 * (Put original reference) 2462 * jbd2_journal_put_journal_head(jh); 2463 */ 2464 2465 /* 2466 * Give a buffer_head a journal_head. 2467 * 2468 * May sleep. 2469 */ 2470 struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) 2471 { 2472 struct journal_head *jh; 2473 struct journal_head *new_jh = NULL; 2474 2475 repeat: 2476 if (!buffer_jbd(bh)) 2477 new_jh = journal_alloc_journal_head(); 2478 2479 jbd_lock_bh_journal_head(bh); 2480 if (buffer_jbd(bh)) { 2481 jh = bh2jh(bh); 2482 } else { 2483 J_ASSERT_BH(bh, 2484 (atomic_read(&bh->b_count) > 0) || 2485 (bh->b_page && bh->b_page->mapping)); 2486 2487 if (!new_jh) { 2488 jbd_unlock_bh_journal_head(bh); 2489 goto repeat; 2490 } 2491 2492 jh = new_jh; 2493 new_jh = NULL; /* We consumed it */ 2494 set_buffer_jbd(bh); 2495 bh->b_private = jh; 2496 jh->b_bh = bh; 2497 get_bh(bh); 2498 BUFFER_TRACE(bh, "added journal_head"); 2499 } 2500 jh->b_jcount++; 2501 jbd_unlock_bh_journal_head(bh); 2502 if (new_jh) 2503 journal_free_journal_head(new_jh); 2504 return bh->b_private; 2505 } 2506 2507 /* 2508 * Grab a ref against this buffer_head's journal_head. If it ended up not 2509 * having a journal_head, return NULL 2510 */ 2511 struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) 2512 { 2513 struct journal_head *jh = NULL; 2514 2515 jbd_lock_bh_journal_head(bh); 2516 if (buffer_jbd(bh)) { 2517 jh = bh2jh(bh); 2518 jh->b_jcount++; 2519 } 2520 jbd_unlock_bh_journal_head(bh); 2521 return jh; 2522 } 2523 2524 static void __journal_remove_journal_head(struct buffer_head *bh) 2525 { 2526 struct journal_head *jh = bh2jh(bh); 2527 2528 J_ASSERT_JH(jh, jh->b_jcount >= 0); 2529 J_ASSERT_JH(jh, jh->b_transaction == NULL); 2530 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 2531 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 2532 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 2533 J_ASSERT_BH(bh, buffer_jbd(bh)); 2534 J_ASSERT_BH(bh, jh2bh(jh) == bh); 2535 BUFFER_TRACE(bh, "remove journal_head"); 2536 if (jh->b_frozen_data) { 2537 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); 2538 jbd2_free(jh->b_frozen_data, bh->b_size); 2539 } 2540 if (jh->b_committed_data) { 2541 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); 2542 jbd2_free(jh->b_committed_data, bh->b_size); 2543 } 2544 bh->b_private = NULL; 2545 jh->b_bh = NULL; /* debug, really */ 2546 clear_buffer_jbd(bh); 2547 journal_free_journal_head(jh); 2548 } 2549 2550 /* 2551 * Drop a reference on the passed journal_head. If it fell to zero then 2552 * release the journal_head from the buffer_head. 2553 */ 2554 void jbd2_journal_put_journal_head(struct journal_head *jh) 2555 { 2556 struct buffer_head *bh = jh2bh(jh); 2557 2558 jbd_lock_bh_journal_head(bh); 2559 J_ASSERT_JH(jh, jh->b_jcount > 0); 2560 --jh->b_jcount; 2561 if (!jh->b_jcount) { 2562 __journal_remove_journal_head(bh); 2563 jbd_unlock_bh_journal_head(bh); 2564 __brelse(bh); 2565 } else 2566 jbd_unlock_bh_journal_head(bh); 2567 } 2568 2569 /* 2570 * Initialize jbd inode head 2571 */ 2572 void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) 2573 { 2574 jinode->i_transaction = NULL; 2575 jinode->i_next_transaction = NULL; 2576 jinode->i_vfs_inode = inode; 2577 jinode->i_flags = 0; 2578 INIT_LIST_HEAD(&jinode->i_list); 2579 } 2580 2581 /* 2582 * Function to be called before we start removing inode from memory (i.e., 2583 * clear_inode() is a fine place to be called from). It removes inode from 2584 * transaction's lists. 2585 */ 2586 void jbd2_journal_release_jbd_inode(journal_t *journal, 2587 struct jbd2_inode *jinode) 2588 { 2589 if (!journal) 2590 return; 2591 restart: 2592 spin_lock(&journal->j_list_lock); 2593 /* Is commit writing out inode - we have to wait */ 2594 if (jinode->i_flags & JI_COMMIT_RUNNING) { 2595 wait_queue_head_t *wq; 2596 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 2597 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 2598 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 2599 spin_unlock(&journal->j_list_lock); 2600 schedule(); 2601 finish_wait(wq, &wait.wq_entry); 2602 goto restart; 2603 } 2604 2605 if (jinode->i_transaction) { 2606 list_del(&jinode->i_list); 2607 jinode->i_transaction = NULL; 2608 } 2609 spin_unlock(&journal->j_list_lock); 2610 } 2611 2612 2613 #ifdef CONFIG_PROC_FS 2614 2615 #define JBD2_STATS_PROC_NAME "fs/jbd2" 2616 2617 static void __init jbd2_create_jbd_stats_proc_entry(void) 2618 { 2619 proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL); 2620 } 2621 2622 static void __exit jbd2_remove_jbd_stats_proc_entry(void) 2623 { 2624 if (proc_jbd2_stats) 2625 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL); 2626 } 2627 2628 #else 2629 2630 #define jbd2_create_jbd_stats_proc_entry() do {} while (0) 2631 #define jbd2_remove_jbd_stats_proc_entry() do {} while (0) 2632 2633 #endif 2634 2635 struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 2636 2637 static int __init jbd2_journal_init_handle_cache(void) 2638 { 2639 jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 2640 if (jbd2_handle_cache == NULL) { 2641 printk(KERN_EMERG "JBD2: failed to create handle cache\n"); 2642 return -ENOMEM; 2643 } 2644 jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0); 2645 if (jbd2_inode_cache == NULL) { 2646 printk(KERN_EMERG "JBD2: failed to create inode cache\n"); 2647 kmem_cache_destroy(jbd2_handle_cache); 2648 return -ENOMEM; 2649 } 2650 return 0; 2651 } 2652 2653 static void jbd2_journal_destroy_handle_cache(void) 2654 { 2655 if (jbd2_handle_cache) 2656 kmem_cache_destroy(jbd2_handle_cache); 2657 if (jbd2_inode_cache) 2658 kmem_cache_destroy(jbd2_inode_cache); 2659 2660 } 2661 2662 /* 2663 * Module startup and shutdown 2664 */ 2665 2666 static int __init journal_init_caches(void) 2667 { 2668 int ret; 2669 2670 ret = jbd2_journal_init_revoke_caches(); 2671 if (ret == 0) 2672 ret = jbd2_journal_init_journal_head_cache(); 2673 if (ret == 0) 2674 ret = jbd2_journal_init_handle_cache(); 2675 if (ret == 0) 2676 ret = jbd2_journal_init_transaction_cache(); 2677 return ret; 2678 } 2679 2680 static void jbd2_journal_destroy_caches(void) 2681 { 2682 jbd2_journal_destroy_revoke_caches(); 2683 jbd2_journal_destroy_journal_head_cache(); 2684 jbd2_journal_destroy_handle_cache(); 2685 jbd2_journal_destroy_transaction_cache(); 2686 jbd2_journal_destroy_slabs(); 2687 } 2688 2689 static int __init journal_init(void) 2690 { 2691 int ret; 2692 2693 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2694 2695 ret = journal_init_caches(); 2696 if (ret == 0) { 2697 jbd2_create_jbd_stats_proc_entry(); 2698 } else { 2699 jbd2_journal_destroy_caches(); 2700 } 2701 return ret; 2702 } 2703 2704 static void __exit journal_exit(void) 2705 { 2706 #ifdef CONFIG_JBD2_DEBUG 2707 int n = atomic_read(&nr_journal_heads); 2708 if (n) 2709 printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n); 2710 #endif 2711 jbd2_remove_jbd_stats_proc_entry(); 2712 jbd2_journal_destroy_caches(); 2713 } 2714 2715 MODULE_LICENSE("GPL"); 2716 module_init(journal_init); 2717 module_exit(journal_exit); 2718 2719