1 /* 2 * linux/fs/jbd2/journal.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Generic filesystem journal-writing code; part of the ext2fs 13 * journaling system. 14 * 15 * This file manages journals: areas of disk reserved for logging 16 * transactional updates. This includes the kernel journaling thread 17 * which is responsible for scheduling updates to the log. 18 * 19 * We do not actually manage the physical storage of the journal in this 20 * file: that is left to a per-journal policy function, which allows us 21 * to store the journal within a filesystem-specified area for ext2 22 * journaling (ext2 can use a reserved inode for storing the log). 23 */ 24 25 #include <linux/module.h> 26 #include <linux/time.h> 27 #include <linux/fs.h> 28 #include <linux/jbd2.h> 29 #include <linux/errno.h> 30 #include <linux/slab.h> 31 #include <linux/init.h> 32 #include <linux/mm.h> 33 #include <linux/freezer.h> 34 #include <linux/pagemap.h> 35 #include <linux/kthread.h> 36 #include <linux/poison.h> 37 #include <linux/proc_fs.h> 38 #include <linux/debugfs.h> 39 #include <linux/seq_file.h> 40 #include <linux/math64.h> 41 #include <linux/hash.h> 42 43 #define CREATE_TRACE_POINTS 44 #include <trace/events/jbd2.h> 45 46 #include <asm/uaccess.h> 47 #include <asm/page.h> 48 49 EXPORT_SYMBOL(jbd2_journal_start); 50 EXPORT_SYMBOL(jbd2_journal_restart); 51 EXPORT_SYMBOL(jbd2_journal_extend); 52 EXPORT_SYMBOL(jbd2_journal_stop); 53 EXPORT_SYMBOL(jbd2_journal_lock_updates); 54 EXPORT_SYMBOL(jbd2_journal_unlock_updates); 55 EXPORT_SYMBOL(jbd2_journal_get_write_access); 56 EXPORT_SYMBOL(jbd2_journal_get_create_access); 57 EXPORT_SYMBOL(jbd2_journal_get_undo_access); 58 EXPORT_SYMBOL(jbd2_journal_set_triggers); 59 EXPORT_SYMBOL(jbd2_journal_dirty_metadata); 60 EXPORT_SYMBOL(jbd2_journal_release_buffer); 61 EXPORT_SYMBOL(jbd2_journal_forget); 62 #if 0 63 EXPORT_SYMBOL(journal_sync_buffer); 64 #endif 65 EXPORT_SYMBOL(jbd2_journal_flush); 66 EXPORT_SYMBOL(jbd2_journal_revoke); 67 68 EXPORT_SYMBOL(jbd2_journal_init_dev); 69 EXPORT_SYMBOL(jbd2_journal_init_inode); 70 EXPORT_SYMBOL(jbd2_journal_update_format); 71 EXPORT_SYMBOL(jbd2_journal_check_used_features); 72 EXPORT_SYMBOL(jbd2_journal_check_available_features); 73 EXPORT_SYMBOL(jbd2_journal_set_features); 74 EXPORT_SYMBOL(jbd2_journal_load); 75 EXPORT_SYMBOL(jbd2_journal_destroy); 76 EXPORT_SYMBOL(jbd2_journal_abort); 77 EXPORT_SYMBOL(jbd2_journal_errno); 78 EXPORT_SYMBOL(jbd2_journal_ack_err); 79 EXPORT_SYMBOL(jbd2_journal_clear_err); 80 EXPORT_SYMBOL(jbd2_log_wait_commit); 81 EXPORT_SYMBOL(jbd2_log_start_commit); 82 EXPORT_SYMBOL(jbd2_journal_start_commit); 83 EXPORT_SYMBOL(jbd2_journal_force_commit_nested); 84 EXPORT_SYMBOL(jbd2_journal_wipe); 85 EXPORT_SYMBOL(jbd2_journal_blocks_per_page); 86 EXPORT_SYMBOL(jbd2_journal_invalidatepage); 87 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); 88 EXPORT_SYMBOL(jbd2_journal_force_commit); 89 EXPORT_SYMBOL(jbd2_journal_file_inode); 90 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); 91 EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); 92 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 93 94 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 95 static void __journal_abort_soft (journal_t *journal, int errno); 96 97 /* 98 * Helper function used to manage commit timeouts 99 */ 100 101 static void commit_timeout(unsigned long __data) 102 { 103 struct task_struct * p = (struct task_struct *) __data; 104 105 wake_up_process(p); 106 } 107 108 /* 109 * kjournald2: The main thread function used to manage a logging device 110 * journal. 111 * 112 * This kernel thread is responsible for two things: 113 * 114 * 1) COMMIT: Every so often we need to commit the current state of the 115 * filesystem to disk. The journal thread is responsible for writing 116 * all of the metadata buffers to disk. 117 * 118 * 2) CHECKPOINT: We cannot reuse a used section of the log file until all 119 * of the data in that part of the log has been rewritten elsewhere on 120 * the disk. Flushing these old buffers to reclaim space in the log is 121 * known as checkpointing, and this thread is responsible for that job. 122 */ 123 124 static int kjournald2(void *arg) 125 { 126 journal_t *journal = arg; 127 transaction_t *transaction; 128 129 /* 130 * Set up an interval timer which can be used to trigger a commit wakeup 131 * after the commit interval expires 132 */ 133 setup_timer(&journal->j_commit_timer, commit_timeout, 134 (unsigned long)current); 135 136 /* Record that the journal thread is running */ 137 journal->j_task = current; 138 wake_up(&journal->j_wait_done_commit); 139 140 /* 141 * And now, wait forever for commit wakeup events. 142 */ 143 spin_lock(&journal->j_state_lock); 144 145 loop: 146 if (journal->j_flags & JBD2_UNMOUNT) 147 goto end_loop; 148 149 jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", 150 journal->j_commit_sequence, journal->j_commit_request); 151 152 if (journal->j_commit_sequence != journal->j_commit_request) { 153 jbd_debug(1, "OK, requests differ\n"); 154 spin_unlock(&journal->j_state_lock); 155 del_timer_sync(&journal->j_commit_timer); 156 jbd2_journal_commit_transaction(journal); 157 spin_lock(&journal->j_state_lock); 158 goto loop; 159 } 160 161 wake_up(&journal->j_wait_done_commit); 162 if (freezing(current)) { 163 /* 164 * The simpler the better. Flushing journal isn't a 165 * good idea, because that depends on threads that may 166 * be already stopped. 167 */ 168 jbd_debug(1, "Now suspending kjournald2\n"); 169 spin_unlock(&journal->j_state_lock); 170 refrigerator(); 171 spin_lock(&journal->j_state_lock); 172 } else { 173 /* 174 * We assume on resume that commits are already there, 175 * so we don't sleep 176 */ 177 DEFINE_WAIT(wait); 178 int should_sleep = 1; 179 180 prepare_to_wait(&journal->j_wait_commit, &wait, 181 TASK_INTERRUPTIBLE); 182 if (journal->j_commit_sequence != journal->j_commit_request) 183 should_sleep = 0; 184 transaction = journal->j_running_transaction; 185 if (transaction && time_after_eq(jiffies, 186 transaction->t_expires)) 187 should_sleep = 0; 188 if (journal->j_flags & JBD2_UNMOUNT) 189 should_sleep = 0; 190 if (should_sleep) { 191 spin_unlock(&journal->j_state_lock); 192 schedule(); 193 spin_lock(&journal->j_state_lock); 194 } 195 finish_wait(&journal->j_wait_commit, &wait); 196 } 197 198 jbd_debug(1, "kjournald2 wakes\n"); 199 200 /* 201 * Were we woken up by a commit wakeup event? 202 */ 203 transaction = journal->j_running_transaction; 204 if (transaction && time_after_eq(jiffies, transaction->t_expires)) { 205 journal->j_commit_request = transaction->t_tid; 206 jbd_debug(1, "woke because of timeout\n"); 207 } 208 goto loop; 209 210 end_loop: 211 spin_unlock(&journal->j_state_lock); 212 del_timer_sync(&journal->j_commit_timer); 213 journal->j_task = NULL; 214 wake_up(&journal->j_wait_done_commit); 215 jbd_debug(1, "Journal thread exiting.\n"); 216 return 0; 217 } 218 219 static int jbd2_journal_start_thread(journal_t *journal) 220 { 221 struct task_struct *t; 222 223 t = kthread_run(kjournald2, journal, "jbd2/%s", 224 journal->j_devname); 225 if (IS_ERR(t)) 226 return PTR_ERR(t); 227 228 wait_event(journal->j_wait_done_commit, journal->j_task != NULL); 229 return 0; 230 } 231 232 static void journal_kill_thread(journal_t *journal) 233 { 234 spin_lock(&journal->j_state_lock); 235 journal->j_flags |= JBD2_UNMOUNT; 236 237 while (journal->j_task) { 238 wake_up(&journal->j_wait_commit); 239 spin_unlock(&journal->j_state_lock); 240 wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 241 spin_lock(&journal->j_state_lock); 242 } 243 spin_unlock(&journal->j_state_lock); 244 } 245 246 /* 247 * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. 248 * 249 * Writes a metadata buffer to a given disk block. The actual IO is not 250 * performed but a new buffer_head is constructed which labels the data 251 * to be written with the correct destination disk block. 252 * 253 * Any magic-number escaping which needs to be done will cause a 254 * copy-out here. If the buffer happens to start with the 255 * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the 256 * magic number is only written to the log for descripter blocks. In 257 * this case, we copy the data and replace the first word with 0, and we 258 * return a result code which indicates that this buffer needs to be 259 * marked as an escaped buffer in the corresponding log descriptor 260 * block. The missing word can then be restored when the block is read 261 * during recovery. 262 * 263 * If the source buffer has already been modified by a new transaction 264 * since we took the last commit snapshot, we use the frozen copy of 265 * that data for IO. If we end up using the existing buffer_head's data 266 * for the write, then we *have* to lock the buffer to prevent anyone 267 * else from using and possibly modifying it while the IO is in 268 * progress. 269 * 270 * The function returns a pointer to the buffer_heads to be used for IO. 271 * 272 * We assume that the journal has already been locked in this function. 273 * 274 * Return value: 275 * <0: Error 276 * >=0: Finished OK 277 * 278 * On success: 279 * Bit 0 set == escape performed on the data 280 * Bit 1 set == buffer copy-out performed (kfree the data after IO) 281 */ 282 283 int jbd2_journal_write_metadata_buffer(transaction_t *transaction, 284 struct journal_head *jh_in, 285 struct journal_head **jh_out, 286 unsigned long long blocknr) 287 { 288 int need_copy_out = 0; 289 int done_copy_out = 0; 290 int do_escape = 0; 291 char *mapped_data; 292 struct buffer_head *new_bh; 293 struct journal_head *new_jh; 294 struct page *new_page; 295 unsigned int new_offset; 296 struct buffer_head *bh_in = jh2bh(jh_in); 297 struct jbd2_buffer_trigger_type *triggers; 298 journal_t *journal = transaction->t_journal; 299 300 /* 301 * The buffer really shouldn't be locked: only the current committing 302 * transaction is allowed to write it, so nobody else is allowed 303 * to do any IO. 304 * 305 * akpm: except if we're journalling data, and write() output is 306 * also part of a shared mapping, and another thread has 307 * decided to launch a writepage() against this buffer. 308 */ 309 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 310 311 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 312 /* keep subsequent assertions sane */ 313 new_bh->b_state = 0; 314 init_buffer(new_bh, NULL, NULL); 315 atomic_set(&new_bh->b_count, 1); 316 new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ 317 318 /* 319 * If a new transaction has already done a buffer copy-out, then 320 * we use that version of the data for the commit. 321 */ 322 jbd_lock_bh_state(bh_in); 323 repeat: 324 if (jh_in->b_frozen_data) { 325 done_copy_out = 1; 326 new_page = virt_to_page(jh_in->b_frozen_data); 327 new_offset = offset_in_page(jh_in->b_frozen_data); 328 triggers = jh_in->b_frozen_triggers; 329 } else { 330 new_page = jh2bh(jh_in)->b_page; 331 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 332 triggers = jh_in->b_triggers; 333 } 334 335 mapped_data = kmap_atomic(new_page, KM_USER0); 336 /* 337 * Fire any commit trigger. Do this before checking for escaping, 338 * as the trigger may modify the magic offset. If a copy-out 339 * happens afterwards, it will have the correct data in the buffer. 340 */ 341 jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, 342 triggers); 343 344 /* 345 * Check for escaping 346 */ 347 if (*((__be32 *)(mapped_data + new_offset)) == 348 cpu_to_be32(JBD2_MAGIC_NUMBER)) { 349 need_copy_out = 1; 350 do_escape = 1; 351 } 352 kunmap_atomic(mapped_data, KM_USER0); 353 354 /* 355 * Do we need to do a data copy? 356 */ 357 if (need_copy_out && !done_copy_out) { 358 char *tmp; 359 360 jbd_unlock_bh_state(bh_in); 361 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 362 if (!tmp) { 363 jbd2_journal_put_journal_head(new_jh); 364 return -ENOMEM; 365 } 366 jbd_lock_bh_state(bh_in); 367 if (jh_in->b_frozen_data) { 368 jbd2_free(tmp, bh_in->b_size); 369 goto repeat; 370 } 371 372 jh_in->b_frozen_data = tmp; 373 mapped_data = kmap_atomic(new_page, KM_USER0); 374 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 375 kunmap_atomic(mapped_data, KM_USER0); 376 377 new_page = virt_to_page(tmp); 378 new_offset = offset_in_page(tmp); 379 done_copy_out = 1; 380 381 /* 382 * This isn't strictly necessary, as we're using frozen 383 * data for the escaping, but it keeps consistency with 384 * b_frozen_data usage. 385 */ 386 jh_in->b_frozen_triggers = jh_in->b_triggers; 387 } 388 389 /* 390 * Did we need to do an escaping? Now we've done all the 391 * copying, we can finally do so. 392 */ 393 if (do_escape) { 394 mapped_data = kmap_atomic(new_page, KM_USER0); 395 *((unsigned int *)(mapped_data + new_offset)) = 0; 396 kunmap_atomic(mapped_data, KM_USER0); 397 } 398 399 set_bh_page(new_bh, new_page, new_offset); 400 new_jh->b_transaction = NULL; 401 new_bh->b_size = jh2bh(jh_in)->b_size; 402 new_bh->b_bdev = transaction->t_journal->j_dev; 403 new_bh->b_blocknr = blocknr; 404 set_buffer_mapped(new_bh); 405 set_buffer_dirty(new_bh); 406 407 *jh_out = new_jh; 408 409 /* 410 * The to-be-written buffer needs to get moved to the io queue, 411 * and the original buffer whose contents we are shadowing or 412 * copying is moved to the transaction's shadow queue. 413 */ 414 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 415 spin_lock(&journal->j_list_lock); 416 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 417 spin_unlock(&journal->j_list_lock); 418 jbd_unlock_bh_state(bh_in); 419 420 JBUFFER_TRACE(new_jh, "file as BJ_IO"); 421 jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); 422 423 return do_escape | (done_copy_out << 1); 424 } 425 426 /* 427 * Allocation code for the journal file. Manage the space left in the 428 * journal, so that we can begin checkpointing when appropriate. 429 */ 430 431 /* 432 * __jbd2_log_space_left: Return the number of free blocks left in the journal. 433 * 434 * Called with the journal already locked. 435 * 436 * Called under j_state_lock 437 */ 438 439 int __jbd2_log_space_left(journal_t *journal) 440 { 441 int left = journal->j_free; 442 443 assert_spin_locked(&journal->j_state_lock); 444 445 /* 446 * Be pessimistic here about the number of those free blocks which 447 * might be required for log descriptor control blocks. 448 */ 449 450 #define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ 451 452 left -= MIN_LOG_RESERVED_BLOCKS; 453 454 if (left <= 0) 455 return 0; 456 left -= (left >> 3); 457 return left; 458 } 459 460 /* 461 * Called under j_state_lock. Returns true if a transaction commit was started. 462 */ 463 int __jbd2_log_start_commit(journal_t *journal, tid_t target) 464 { 465 /* 466 * Are we already doing a recent enough commit? 467 */ 468 if (!tid_geq(journal->j_commit_request, target)) { 469 /* 470 * We want a new commit: OK, mark the request and wakup the 471 * commit thread. We do _not_ do the commit ourselves. 472 */ 473 474 journal->j_commit_request = target; 475 jbd_debug(1, "JBD: requesting commit %d/%d\n", 476 journal->j_commit_request, 477 journal->j_commit_sequence); 478 wake_up(&journal->j_wait_commit); 479 return 1; 480 } 481 return 0; 482 } 483 484 int jbd2_log_start_commit(journal_t *journal, tid_t tid) 485 { 486 int ret; 487 488 spin_lock(&journal->j_state_lock); 489 ret = __jbd2_log_start_commit(journal, tid); 490 spin_unlock(&journal->j_state_lock); 491 return ret; 492 } 493 494 /* 495 * Force and wait upon a commit if the calling process is not within 496 * transaction. This is used for forcing out undo-protected data which contains 497 * bitmaps, when the fs is running out of space. 498 * 499 * We can only force the running transaction if we don't have an active handle; 500 * otherwise, we will deadlock. 501 * 502 * Returns true if a transaction was started. 503 */ 504 int jbd2_journal_force_commit_nested(journal_t *journal) 505 { 506 transaction_t *transaction = NULL; 507 tid_t tid; 508 509 spin_lock(&journal->j_state_lock); 510 if (journal->j_running_transaction && !current->journal_info) { 511 transaction = journal->j_running_transaction; 512 __jbd2_log_start_commit(journal, transaction->t_tid); 513 } else if (journal->j_committing_transaction) 514 transaction = journal->j_committing_transaction; 515 516 if (!transaction) { 517 spin_unlock(&journal->j_state_lock); 518 return 0; /* Nothing to retry */ 519 } 520 521 tid = transaction->t_tid; 522 spin_unlock(&journal->j_state_lock); 523 jbd2_log_wait_commit(journal, tid); 524 return 1; 525 } 526 527 /* 528 * Start a commit of the current running transaction (if any). Returns true 529 * if a transaction is going to be committed (or is currently already 530 * committing), and fills its tid in at *ptid 531 */ 532 int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 533 { 534 int ret = 0; 535 536 spin_lock(&journal->j_state_lock); 537 if (journal->j_running_transaction) { 538 tid_t tid = journal->j_running_transaction->t_tid; 539 540 __jbd2_log_start_commit(journal, tid); 541 /* There's a running transaction and we've just made sure 542 * it's commit has been scheduled. */ 543 if (ptid) 544 *ptid = tid; 545 ret = 1; 546 } else if (journal->j_committing_transaction) { 547 /* 548 * If ext3_write_super() recently started a commit, then we 549 * have to wait for completion of that transaction 550 */ 551 if (ptid) 552 *ptid = journal->j_committing_transaction->t_tid; 553 ret = 1; 554 } 555 spin_unlock(&journal->j_state_lock); 556 return ret; 557 } 558 559 /* 560 * Wait for a specified commit to complete. 561 * The caller may not hold the journal lock. 562 */ 563 int jbd2_log_wait_commit(journal_t *journal, tid_t tid) 564 { 565 int err = 0; 566 567 #ifdef CONFIG_JBD2_DEBUG 568 spin_lock(&journal->j_state_lock); 569 if (!tid_geq(journal->j_commit_request, tid)) { 570 printk(KERN_EMERG 571 "%s: error: j_commit_request=%d, tid=%d\n", 572 __func__, journal->j_commit_request, tid); 573 } 574 spin_unlock(&journal->j_state_lock); 575 #endif 576 spin_lock(&journal->j_state_lock); 577 while (tid_gt(tid, journal->j_commit_sequence)) { 578 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 579 tid, journal->j_commit_sequence); 580 wake_up(&journal->j_wait_commit); 581 spin_unlock(&journal->j_state_lock); 582 wait_event(journal->j_wait_done_commit, 583 !tid_gt(tid, journal->j_commit_sequence)); 584 spin_lock(&journal->j_state_lock); 585 } 586 spin_unlock(&journal->j_state_lock); 587 588 if (unlikely(is_journal_aborted(journal))) { 589 printk(KERN_EMERG "journal commit I/O error\n"); 590 err = -EIO; 591 } 592 return err; 593 } 594 595 /* 596 * Log buffer allocation routines: 597 */ 598 599 int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) 600 { 601 unsigned long blocknr; 602 603 spin_lock(&journal->j_state_lock); 604 J_ASSERT(journal->j_free > 1); 605 606 blocknr = journal->j_head; 607 journal->j_head++; 608 journal->j_free--; 609 if (journal->j_head == journal->j_last) 610 journal->j_head = journal->j_first; 611 spin_unlock(&journal->j_state_lock); 612 return jbd2_journal_bmap(journal, blocknr, retp); 613 } 614 615 /* 616 * Conversion of logical to physical block numbers for the journal 617 * 618 * On external journals the journal blocks are identity-mapped, so 619 * this is a no-op. If needed, we can use j_blk_offset - everything is 620 * ready. 621 */ 622 int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, 623 unsigned long long *retp) 624 { 625 int err = 0; 626 unsigned long long ret; 627 628 if (journal->j_inode) { 629 ret = bmap(journal->j_inode, blocknr); 630 if (ret) 631 *retp = ret; 632 else { 633 printk(KERN_ALERT "%s: journal block not found " 634 "at offset %lu on %s\n", 635 __func__, blocknr, journal->j_devname); 636 err = -EIO; 637 __journal_abort_soft(journal, err); 638 } 639 } else { 640 *retp = blocknr; /* +journal->j_blk_offset */ 641 } 642 return err; 643 } 644 645 /* 646 * We play buffer_head aliasing tricks to write data/metadata blocks to 647 * the journal without copying their contents, but for journal 648 * descriptor blocks we do need to generate bona fide buffers. 649 * 650 * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying 651 * the buffer's contents they really should run flush_dcache_page(bh->b_page). 652 * But we don't bother doing that, so there will be coherency problems with 653 * mmaps of blockdevs which hold live JBD-controlled filesystems. 654 */ 655 struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) 656 { 657 struct buffer_head *bh; 658 unsigned long long blocknr; 659 int err; 660 661 err = jbd2_journal_next_log_block(journal, &blocknr); 662 663 if (err) 664 return NULL; 665 666 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 667 if (!bh) 668 return NULL; 669 lock_buffer(bh); 670 memset(bh->b_data, 0, journal->j_blocksize); 671 set_buffer_uptodate(bh); 672 unlock_buffer(bh); 673 BUFFER_TRACE(bh, "return this buffer"); 674 return jbd2_journal_add_journal_head(bh); 675 } 676 677 struct jbd2_stats_proc_session { 678 journal_t *journal; 679 struct transaction_stats_s *stats; 680 int start; 681 int max; 682 }; 683 684 static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) 685 { 686 return *pos ? NULL : SEQ_START_TOKEN; 687 } 688 689 static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) 690 { 691 return NULL; 692 } 693 694 static int jbd2_seq_info_show(struct seq_file *seq, void *v) 695 { 696 struct jbd2_stats_proc_session *s = seq->private; 697 698 if (v != SEQ_START_TOKEN) 699 return 0; 700 seq_printf(seq, "%lu transaction, each up to %u blocks\n", 701 s->stats->ts_tid, 702 s->journal->j_max_transaction_buffers); 703 if (s->stats->ts_tid == 0) 704 return 0; 705 seq_printf(seq, "average: \n %ums waiting for transaction\n", 706 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid)); 707 seq_printf(seq, " %ums running transaction\n", 708 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid)); 709 seq_printf(seq, " %ums transaction was being locked\n", 710 jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid)); 711 seq_printf(seq, " %ums flushing data (in ordered mode)\n", 712 jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid)); 713 seq_printf(seq, " %ums logging transaction\n", 714 jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid)); 715 seq_printf(seq, " %lluus average transaction commit time\n", 716 div_u64(s->journal->j_average_commit_time, 1000)); 717 seq_printf(seq, " %lu handles per transaction\n", 718 s->stats->run.rs_handle_count / s->stats->ts_tid); 719 seq_printf(seq, " %lu blocks per transaction\n", 720 s->stats->run.rs_blocks / s->stats->ts_tid); 721 seq_printf(seq, " %lu logged blocks per transaction\n", 722 s->stats->run.rs_blocks_logged / s->stats->ts_tid); 723 return 0; 724 } 725 726 static void jbd2_seq_info_stop(struct seq_file *seq, void *v) 727 { 728 } 729 730 static const struct seq_operations jbd2_seq_info_ops = { 731 .start = jbd2_seq_info_start, 732 .next = jbd2_seq_info_next, 733 .stop = jbd2_seq_info_stop, 734 .show = jbd2_seq_info_show, 735 }; 736 737 static int jbd2_seq_info_open(struct inode *inode, struct file *file) 738 { 739 journal_t *journal = PDE(inode)->data; 740 struct jbd2_stats_proc_session *s; 741 int rc, size; 742 743 s = kmalloc(sizeof(*s), GFP_KERNEL); 744 if (s == NULL) 745 return -ENOMEM; 746 size = sizeof(struct transaction_stats_s); 747 s->stats = kmalloc(size, GFP_KERNEL); 748 if (s->stats == NULL) { 749 kfree(s); 750 return -ENOMEM; 751 } 752 spin_lock(&journal->j_history_lock); 753 memcpy(s->stats, &journal->j_stats, size); 754 s->journal = journal; 755 spin_unlock(&journal->j_history_lock); 756 757 rc = seq_open(file, &jbd2_seq_info_ops); 758 if (rc == 0) { 759 struct seq_file *m = file->private_data; 760 m->private = s; 761 } else { 762 kfree(s->stats); 763 kfree(s); 764 } 765 return rc; 766 767 } 768 769 static int jbd2_seq_info_release(struct inode *inode, struct file *file) 770 { 771 struct seq_file *seq = file->private_data; 772 struct jbd2_stats_proc_session *s = seq->private; 773 kfree(s->stats); 774 kfree(s); 775 return seq_release(inode, file); 776 } 777 778 static const struct file_operations jbd2_seq_info_fops = { 779 .owner = THIS_MODULE, 780 .open = jbd2_seq_info_open, 781 .read = seq_read, 782 .llseek = seq_lseek, 783 .release = jbd2_seq_info_release, 784 }; 785 786 static struct proc_dir_entry *proc_jbd2_stats; 787 788 static void jbd2_stats_proc_init(journal_t *journal) 789 { 790 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); 791 if (journal->j_proc_entry) { 792 proc_create_data("info", S_IRUGO, journal->j_proc_entry, 793 &jbd2_seq_info_fops, journal); 794 } 795 } 796 797 static void jbd2_stats_proc_exit(journal_t *journal) 798 { 799 remove_proc_entry("info", journal->j_proc_entry); 800 remove_proc_entry(journal->j_devname, proc_jbd2_stats); 801 } 802 803 /* 804 * Management for journal control blocks: functions to create and 805 * destroy journal_t structures, and to initialise and read existing 806 * journal blocks from disk. */ 807 808 /* First: create and setup a journal_t object in memory. We initialise 809 * very few fields yet: that has to wait until we have created the 810 * journal structures from from scratch, or loaded them from disk. */ 811 812 static journal_t * journal_init_common (void) 813 { 814 journal_t *journal; 815 int err; 816 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL); 818 if (!journal) 819 goto fail; 820 821 init_waitqueue_head(&journal->j_wait_transaction_locked); 822 init_waitqueue_head(&journal->j_wait_logspace); 823 init_waitqueue_head(&journal->j_wait_done_commit); 824 init_waitqueue_head(&journal->j_wait_checkpoint); 825 init_waitqueue_head(&journal->j_wait_commit); 826 init_waitqueue_head(&journal->j_wait_updates); 827 mutex_init(&journal->j_barrier); 828 mutex_init(&journal->j_checkpoint_mutex); 829 spin_lock_init(&journal->j_revoke_lock); 830 spin_lock_init(&journal->j_list_lock); 831 spin_lock_init(&journal->j_state_lock); 832 833 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 834 journal->j_min_batch_time = 0; 835 journal->j_max_batch_time = 15000; /* 15ms */ 836 837 /* The journal is marked for error until we succeed with recovery! */ 838 journal->j_flags = JBD2_ABORT; 839 840 /* Set up a default-sized revoke table for the new mount. */ 841 err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); 842 if (err) { 843 kfree(journal); 844 goto fail; 845 } 846 847 spin_lock_init(&journal->j_history_lock); 848 849 return journal; 850 fail: 851 return NULL; 852 } 853 854 /* jbd2_journal_init_dev and jbd2_journal_init_inode: 855 * 856 * Create a journal structure assigned some fixed set of disk blocks to 857 * the journal. We don't actually touch those disk blocks yet, but we 858 * need to set up all of the mapping information to tell the journaling 859 * system where the journal blocks are. 860 * 861 */ 862 863 /** 864 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure 865 * @bdev: Block device on which to create the journal 866 * @fs_dev: Device which hold journalled filesystem for this journal. 867 * @start: Block nr Start of journal. 868 * @len: Length of the journal in blocks. 869 * @blocksize: blocksize of journalling device 870 * 871 * Returns: a newly created journal_t * 872 * 873 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 874 * range of blocks on an arbitrary block device. 875 * 876 */ 877 journal_t * jbd2_journal_init_dev(struct block_device *bdev, 878 struct block_device *fs_dev, 879 unsigned long long start, int len, int blocksize) 880 { 881 journal_t *journal = journal_init_common(); 882 struct buffer_head *bh; 883 char *p; 884 int n; 885 886 if (!journal) 887 return NULL; 888 889 /* journal descriptor can store up to n blocks -bzzz */ 890 journal->j_blocksize = blocksize; 891 jbd2_stats_proc_init(journal); 892 n = journal->j_blocksize / sizeof(journal_block_tag_t); 893 journal->j_wbufsize = n; 894 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 895 if (!journal->j_wbuf) { 896 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 897 __func__); 898 goto out_err; 899 } 900 journal->j_dev = bdev; 901 journal->j_fs_dev = fs_dev; 902 journal->j_blk_offset = start; 903 journal->j_maxlen = len; 904 bdevname(journal->j_dev, journal->j_devname); 905 p = journal->j_devname; 906 while ((p = strchr(p, '/'))) 907 *p = '!'; 908 909 bh = __getblk(journal->j_dev, start, journal->j_blocksize); 910 if (!bh) { 911 printk(KERN_ERR 912 "%s: Cannot get buffer for journal superblock\n", 913 __func__); 914 goto out_err; 915 } 916 journal->j_sb_buffer = bh; 917 journal->j_superblock = (journal_superblock_t *)bh->b_data; 918 919 return journal; 920 out_err: 921 kfree(journal->j_wbuf); 922 jbd2_stats_proc_exit(journal); 923 kfree(journal); 924 return NULL; 925 } 926 927 /** 928 * journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode. 929 * @inode: An inode to create the journal in 930 * 931 * jbd2_journal_init_inode creates a journal which maps an on-disk inode as 932 * the journal. The inode must exist already, must support bmap() and 933 * must have all data blocks preallocated. 934 */ 935 journal_t * jbd2_journal_init_inode (struct inode *inode) 936 { 937 struct buffer_head *bh; 938 journal_t *journal = journal_init_common(); 939 char *p; 940 int err; 941 int n; 942 unsigned long long blocknr; 943 944 if (!journal) 945 return NULL; 946 947 journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; 948 journal->j_inode = inode; 949 bdevname(journal->j_dev, journal->j_devname); 950 p = journal->j_devname; 951 while ((p = strchr(p, '/'))) 952 *p = '!'; 953 p = journal->j_devname + strlen(journal->j_devname); 954 sprintf(p, "-%lu", journal->j_inode->i_ino); 955 jbd_debug(1, 956 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", 957 journal, inode->i_sb->s_id, inode->i_ino, 958 (long long) inode->i_size, 959 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 960 961 journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; 962 journal->j_blocksize = inode->i_sb->s_blocksize; 963 jbd2_stats_proc_init(journal); 964 965 /* journal descriptor can store up to n blocks -bzzz */ 966 n = journal->j_blocksize / sizeof(journal_block_tag_t); 967 journal->j_wbufsize = n; 968 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 969 if (!journal->j_wbuf) { 970 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 971 __func__); 972 goto out_err; 973 } 974 975 err = jbd2_journal_bmap(journal, 0, &blocknr); 976 /* If that failed, give up */ 977 if (err) { 978 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 979 __func__); 980 goto out_err; 981 } 982 983 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 984 if (!bh) { 985 printk(KERN_ERR 986 "%s: Cannot get buffer for journal superblock\n", 987 __func__); 988 goto out_err; 989 } 990 journal->j_sb_buffer = bh; 991 journal->j_superblock = (journal_superblock_t *)bh->b_data; 992 993 return journal; 994 out_err: 995 kfree(journal->j_wbuf); 996 jbd2_stats_proc_exit(journal); 997 kfree(journal); 998 return NULL; 999 } 1000 1001 /* 1002 * If the journal init or create aborts, we need to mark the journal 1003 * superblock as being NULL to prevent the journal destroy from writing 1004 * back a bogus superblock. 1005 */ 1006 static void journal_fail_superblock (journal_t *journal) 1007 { 1008 struct buffer_head *bh = journal->j_sb_buffer; 1009 brelse(bh); 1010 journal->j_sb_buffer = NULL; 1011 } 1012 1013 /* 1014 * Given a journal_t structure, initialise the various fields for 1015 * startup of a new journaling session. We use this both when creating 1016 * a journal, and after recovering an old journal to reset it for 1017 * subsequent use. 1018 */ 1019 1020 static int journal_reset(journal_t *journal) 1021 { 1022 journal_superblock_t *sb = journal->j_superblock; 1023 unsigned long long first, last; 1024 1025 first = be32_to_cpu(sb->s_first); 1026 last = be32_to_cpu(sb->s_maxlen); 1027 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { 1028 printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", 1029 first, last); 1030 journal_fail_superblock(journal); 1031 return -EINVAL; 1032 } 1033 1034 journal->j_first = first; 1035 journal->j_last = last; 1036 1037 journal->j_head = first; 1038 journal->j_tail = first; 1039 journal->j_free = last - first; 1040 1041 journal->j_tail_sequence = journal->j_transaction_sequence; 1042 journal->j_commit_sequence = journal->j_transaction_sequence - 1; 1043 journal->j_commit_request = journal->j_commit_sequence; 1044 1045 journal->j_max_transaction_buffers = journal->j_maxlen / 4; 1046 1047 /* Add the dynamic fields and write it to disk. */ 1048 jbd2_journal_update_superblock(journal, 1); 1049 return jbd2_journal_start_thread(journal); 1050 } 1051 1052 /** 1053 * void jbd2_journal_update_superblock() - Update journal sb on disk. 1054 * @journal: The journal to update. 1055 * @wait: Set to '0' if you don't want to wait for IO completion. 1056 * 1057 * Update a journal's dynamic superblock fields and write it to disk, 1058 * optionally waiting for the IO to complete. 1059 */ 1060 void jbd2_journal_update_superblock(journal_t *journal, int wait) 1061 { 1062 journal_superblock_t *sb = journal->j_superblock; 1063 struct buffer_head *bh = journal->j_sb_buffer; 1064 1065 /* 1066 * As a special case, if the on-disk copy is already marked as needing 1067 * no recovery (s_start == 0) and there are no outstanding transactions 1068 * in the filesystem, then we can safely defer the superblock update 1069 * until the next commit by setting JBD2_FLUSHED. This avoids 1070 * attempting a write to a potential-readonly device. 1071 */ 1072 if (sb->s_start == 0 && journal->j_tail_sequence == 1073 journal->j_transaction_sequence) { 1074 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 1075 "(start %ld, seq %d, errno %d)\n", 1076 journal->j_tail, journal->j_tail_sequence, 1077 journal->j_errno); 1078 goto out; 1079 } 1080 1081 if (buffer_write_io_error(bh)) { 1082 /* 1083 * Oh, dear. A previous attempt to write the journal 1084 * superblock failed. This could happen because the 1085 * USB device was yanked out. Or it could happen to 1086 * be a transient write error and maybe the block will 1087 * be remapped. Nothing we can do but to retry the 1088 * write and hope for the best. 1089 */ 1090 printk(KERN_ERR "JBD2: previous I/O error detected " 1091 "for journal superblock update for %s.\n", 1092 journal->j_devname); 1093 clear_buffer_write_io_error(bh); 1094 set_buffer_uptodate(bh); 1095 } 1096 1097 spin_lock(&journal->j_state_lock); 1098 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1099 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1100 1101 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1102 sb->s_start = cpu_to_be32(journal->j_tail); 1103 sb->s_errno = cpu_to_be32(journal->j_errno); 1104 spin_unlock(&journal->j_state_lock); 1105 1106 BUFFER_TRACE(bh, "marking dirty"); 1107 mark_buffer_dirty(bh); 1108 if (wait) { 1109 sync_dirty_buffer(bh); 1110 if (buffer_write_io_error(bh)) { 1111 printk(KERN_ERR "JBD2: I/O error detected " 1112 "when updating journal superblock for %s.\n", 1113 journal->j_devname); 1114 clear_buffer_write_io_error(bh); 1115 set_buffer_uptodate(bh); 1116 } 1117 } else 1118 ll_rw_block(SWRITE, 1, &bh); 1119 1120 out: 1121 /* If we have just flushed the log (by marking s_start==0), then 1122 * any future commit will have to be careful to update the 1123 * superblock again to re-record the true start of the log. */ 1124 1125 spin_lock(&journal->j_state_lock); 1126 if (sb->s_start) 1127 journal->j_flags &= ~JBD2_FLUSHED; 1128 else 1129 journal->j_flags |= JBD2_FLUSHED; 1130 spin_unlock(&journal->j_state_lock); 1131 } 1132 1133 /* 1134 * Read the superblock for a given journal, performing initial 1135 * validation of the format. 1136 */ 1137 1138 static int journal_get_superblock(journal_t *journal) 1139 { 1140 struct buffer_head *bh; 1141 journal_superblock_t *sb; 1142 int err = -EIO; 1143 1144 bh = journal->j_sb_buffer; 1145 1146 J_ASSERT(bh != NULL); 1147 if (!buffer_uptodate(bh)) { 1148 ll_rw_block(READ, 1, &bh); 1149 wait_on_buffer(bh); 1150 if (!buffer_uptodate(bh)) { 1151 printk (KERN_ERR 1152 "JBD: IO error reading journal superblock\n"); 1153 goto out; 1154 } 1155 } 1156 1157 sb = journal->j_superblock; 1158 1159 err = -EINVAL; 1160 1161 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 1162 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1163 printk(KERN_WARNING "JBD: no valid journal superblock found\n"); 1164 goto out; 1165 } 1166 1167 switch(be32_to_cpu(sb->s_header.h_blocktype)) { 1168 case JBD2_SUPERBLOCK_V1: 1169 journal->j_format_version = 1; 1170 break; 1171 case JBD2_SUPERBLOCK_V2: 1172 journal->j_format_version = 2; 1173 break; 1174 default: 1175 printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); 1176 goto out; 1177 } 1178 1179 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) 1180 journal->j_maxlen = be32_to_cpu(sb->s_maxlen); 1181 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { 1182 printk (KERN_WARNING "JBD: journal file too short\n"); 1183 goto out; 1184 } 1185 1186 return 0; 1187 1188 out: 1189 journal_fail_superblock(journal); 1190 return err; 1191 } 1192 1193 /* 1194 * Load the on-disk journal superblock and read the key fields into the 1195 * journal_t. 1196 */ 1197 1198 static int load_superblock(journal_t *journal) 1199 { 1200 int err; 1201 journal_superblock_t *sb; 1202 1203 err = journal_get_superblock(journal); 1204 if (err) 1205 return err; 1206 1207 sb = journal->j_superblock; 1208 1209 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); 1210 journal->j_tail = be32_to_cpu(sb->s_start); 1211 journal->j_first = be32_to_cpu(sb->s_first); 1212 journal->j_last = be32_to_cpu(sb->s_maxlen); 1213 journal->j_errno = be32_to_cpu(sb->s_errno); 1214 1215 return 0; 1216 } 1217 1218 1219 /** 1220 * int jbd2_journal_load() - Read journal from disk. 1221 * @journal: Journal to act on. 1222 * 1223 * Given a journal_t structure which tells us which disk blocks contain 1224 * a journal, read the journal from disk to initialise the in-memory 1225 * structures. 1226 */ 1227 int jbd2_journal_load(journal_t *journal) 1228 { 1229 int err; 1230 journal_superblock_t *sb; 1231 1232 err = load_superblock(journal); 1233 if (err) 1234 return err; 1235 1236 sb = journal->j_superblock; 1237 /* If this is a V2 superblock, then we have to check the 1238 * features flags on it. */ 1239 1240 if (journal->j_format_version >= 2) { 1241 if ((sb->s_feature_ro_compat & 1242 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 1243 (sb->s_feature_incompat & 1244 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 1245 printk (KERN_WARNING 1246 "JBD: Unrecognised features on journal\n"); 1247 return -EINVAL; 1248 } 1249 } 1250 1251 /* Let the recovery code check whether it needs to recover any 1252 * data from the journal. */ 1253 if (jbd2_journal_recover(journal)) 1254 goto recovery_error; 1255 1256 if (journal->j_failed_commit) { 1257 printk(KERN_ERR "JBD2: journal transaction %u on %s " 1258 "is corrupt.\n", journal->j_failed_commit, 1259 journal->j_devname); 1260 return -EIO; 1261 } 1262 1263 /* OK, we've finished with the dynamic journal bits: 1264 * reinitialise the dynamic contents of the superblock in memory 1265 * and reset them on disk. */ 1266 if (journal_reset(journal)) 1267 goto recovery_error; 1268 1269 journal->j_flags &= ~JBD2_ABORT; 1270 journal->j_flags |= JBD2_LOADED; 1271 return 0; 1272 1273 recovery_error: 1274 printk (KERN_WARNING "JBD: recovery failed\n"); 1275 return -EIO; 1276 } 1277 1278 /** 1279 * void jbd2_journal_destroy() - Release a journal_t structure. 1280 * @journal: Journal to act on. 1281 * 1282 * Release a journal_t structure once it is no longer in use by the 1283 * journaled object. 1284 * Return <0 if we couldn't clean up the journal. 1285 */ 1286 int jbd2_journal_destroy(journal_t *journal) 1287 { 1288 int err = 0; 1289 1290 /* Wait for the commit thread to wake up and die. */ 1291 journal_kill_thread(journal); 1292 1293 /* Force a final log commit */ 1294 if (journal->j_running_transaction) 1295 jbd2_journal_commit_transaction(journal); 1296 1297 /* Force any old transactions to disk */ 1298 1299 /* Totally anal locking here... */ 1300 spin_lock(&journal->j_list_lock); 1301 while (journal->j_checkpoint_transactions != NULL) { 1302 spin_unlock(&journal->j_list_lock); 1303 mutex_lock(&journal->j_checkpoint_mutex); 1304 jbd2_log_do_checkpoint(journal); 1305 mutex_unlock(&journal->j_checkpoint_mutex); 1306 spin_lock(&journal->j_list_lock); 1307 } 1308 1309 J_ASSERT(journal->j_running_transaction == NULL); 1310 J_ASSERT(journal->j_committing_transaction == NULL); 1311 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1312 spin_unlock(&journal->j_list_lock); 1313 1314 if (journal->j_sb_buffer) { 1315 if (!is_journal_aborted(journal)) { 1316 /* We can now mark the journal as empty. */ 1317 journal->j_tail = 0; 1318 journal->j_tail_sequence = 1319 ++journal->j_transaction_sequence; 1320 jbd2_journal_update_superblock(journal, 1); 1321 } else { 1322 err = -EIO; 1323 } 1324 brelse(journal->j_sb_buffer); 1325 } 1326 1327 if (journal->j_proc_entry) 1328 jbd2_stats_proc_exit(journal); 1329 if (journal->j_inode) 1330 iput(journal->j_inode); 1331 if (journal->j_revoke) 1332 jbd2_journal_destroy_revoke(journal); 1333 kfree(journal->j_wbuf); 1334 kfree(journal); 1335 1336 return err; 1337 } 1338 1339 1340 /** 1341 *int jbd2_journal_check_used_features () - Check if features specified are used. 1342 * @journal: Journal to check. 1343 * @compat: bitmask of compatible features 1344 * @ro: bitmask of features that force read-only mount 1345 * @incompat: bitmask of incompatible features 1346 * 1347 * Check whether the journal uses all of a given set of 1348 * features. Return true (non-zero) if it does. 1349 **/ 1350 1351 int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, 1352 unsigned long ro, unsigned long incompat) 1353 { 1354 journal_superblock_t *sb; 1355 1356 if (!compat && !ro && !incompat) 1357 return 1; 1358 if (journal->j_format_version == 1) 1359 return 0; 1360 1361 sb = journal->j_superblock; 1362 1363 if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && 1364 ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && 1365 ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) 1366 return 1; 1367 1368 return 0; 1369 } 1370 1371 /** 1372 * int jbd2_journal_check_available_features() - Check feature set in journalling layer 1373 * @journal: Journal to check. 1374 * @compat: bitmask of compatible features 1375 * @ro: bitmask of features that force read-only mount 1376 * @incompat: bitmask of incompatible features 1377 * 1378 * Check whether the journaling code supports the use of 1379 * all of a given set of features on this journal. Return true 1380 * (non-zero) if it can. */ 1381 1382 int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, 1383 unsigned long ro, unsigned long incompat) 1384 { 1385 journal_superblock_t *sb; 1386 1387 if (!compat && !ro && !incompat) 1388 return 1; 1389 1390 sb = journal->j_superblock; 1391 1392 /* We can support any known requested features iff the 1393 * superblock is in version 2. Otherwise we fail to support any 1394 * extended sb features. */ 1395 1396 if (journal->j_format_version != 2) 1397 return 0; 1398 1399 if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat && 1400 (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro && 1401 (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat) 1402 return 1; 1403 1404 return 0; 1405 } 1406 1407 /** 1408 * int jbd2_journal_set_features () - Mark a given journal feature in the superblock 1409 * @journal: Journal to act on. 1410 * @compat: bitmask of compatible features 1411 * @ro: bitmask of features that force read-only mount 1412 * @incompat: bitmask of incompatible features 1413 * 1414 * Mark a given journal feature as present on the 1415 * superblock. Returns true if the requested features could be set. 1416 * 1417 */ 1418 1419 int jbd2_journal_set_features (journal_t *journal, unsigned long compat, 1420 unsigned long ro, unsigned long incompat) 1421 { 1422 journal_superblock_t *sb; 1423 1424 if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) 1425 return 1; 1426 1427 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1428 return 0; 1429 1430 jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", 1431 compat, ro, incompat); 1432 1433 sb = journal->j_superblock; 1434 1435 sb->s_feature_compat |= cpu_to_be32(compat); 1436 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1437 sb->s_feature_incompat |= cpu_to_be32(incompat); 1438 1439 return 1; 1440 } 1441 1442 /* 1443 * jbd2_journal_clear_features () - Clear a given journal feature in the 1444 * superblock 1445 * @journal: Journal to act on. 1446 * @compat: bitmask of compatible features 1447 * @ro: bitmask of features that force read-only mount 1448 * @incompat: bitmask of incompatible features 1449 * 1450 * Clear a given journal feature as present on the 1451 * superblock. 1452 */ 1453 void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, 1454 unsigned long ro, unsigned long incompat) 1455 { 1456 journal_superblock_t *sb; 1457 1458 jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n", 1459 compat, ro, incompat); 1460 1461 sb = journal->j_superblock; 1462 1463 sb->s_feature_compat &= ~cpu_to_be32(compat); 1464 sb->s_feature_ro_compat &= ~cpu_to_be32(ro); 1465 sb->s_feature_incompat &= ~cpu_to_be32(incompat); 1466 } 1467 EXPORT_SYMBOL(jbd2_journal_clear_features); 1468 1469 /** 1470 * int jbd2_journal_update_format () - Update on-disk journal structure. 1471 * @journal: Journal to act on. 1472 * 1473 * Given an initialised but unloaded journal struct, poke about in the 1474 * on-disk structure to update it to the most recent supported version. 1475 */ 1476 int jbd2_journal_update_format (journal_t *journal) 1477 { 1478 journal_superblock_t *sb; 1479 int err; 1480 1481 err = journal_get_superblock(journal); 1482 if (err) 1483 return err; 1484 1485 sb = journal->j_superblock; 1486 1487 switch (be32_to_cpu(sb->s_header.h_blocktype)) { 1488 case JBD2_SUPERBLOCK_V2: 1489 return 0; 1490 case JBD2_SUPERBLOCK_V1: 1491 return journal_convert_superblock_v1(journal, sb); 1492 default: 1493 break; 1494 } 1495 return -EINVAL; 1496 } 1497 1498 static int journal_convert_superblock_v1(journal_t *journal, 1499 journal_superblock_t *sb) 1500 { 1501 int offset, blocksize; 1502 struct buffer_head *bh; 1503 1504 printk(KERN_WARNING 1505 "JBD: Converting superblock from version 1 to 2.\n"); 1506 1507 /* Pre-initialise new fields to zero */ 1508 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); 1509 blocksize = be32_to_cpu(sb->s_blocksize); 1510 memset(&sb->s_feature_compat, 0, blocksize-offset); 1511 1512 sb->s_nr_users = cpu_to_be32(1); 1513 sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2); 1514 journal->j_format_version = 2; 1515 1516 bh = journal->j_sb_buffer; 1517 BUFFER_TRACE(bh, "marking dirty"); 1518 mark_buffer_dirty(bh); 1519 sync_dirty_buffer(bh); 1520 return 0; 1521 } 1522 1523 1524 /** 1525 * int jbd2_journal_flush () - Flush journal 1526 * @journal: Journal to act on. 1527 * 1528 * Flush all data for a given journal to disk and empty the journal. 1529 * Filesystems can use this when remounting readonly to ensure that 1530 * recovery does not need to happen on remount. 1531 */ 1532 1533 int jbd2_journal_flush(journal_t *journal) 1534 { 1535 int err = 0; 1536 transaction_t *transaction = NULL; 1537 unsigned long old_tail; 1538 1539 spin_lock(&journal->j_state_lock); 1540 1541 /* Force everything buffered to the log... */ 1542 if (journal->j_running_transaction) { 1543 transaction = journal->j_running_transaction; 1544 __jbd2_log_start_commit(journal, transaction->t_tid); 1545 } else if (journal->j_committing_transaction) 1546 transaction = journal->j_committing_transaction; 1547 1548 /* Wait for the log commit to complete... */ 1549 if (transaction) { 1550 tid_t tid = transaction->t_tid; 1551 1552 spin_unlock(&journal->j_state_lock); 1553 jbd2_log_wait_commit(journal, tid); 1554 } else { 1555 spin_unlock(&journal->j_state_lock); 1556 } 1557 1558 /* ...and flush everything in the log out to disk. */ 1559 spin_lock(&journal->j_list_lock); 1560 while (!err && journal->j_checkpoint_transactions != NULL) { 1561 spin_unlock(&journal->j_list_lock); 1562 mutex_lock(&journal->j_checkpoint_mutex); 1563 err = jbd2_log_do_checkpoint(journal); 1564 mutex_unlock(&journal->j_checkpoint_mutex); 1565 spin_lock(&journal->j_list_lock); 1566 } 1567 spin_unlock(&journal->j_list_lock); 1568 1569 if (is_journal_aborted(journal)) 1570 return -EIO; 1571 1572 jbd2_cleanup_journal_tail(journal); 1573 1574 /* Finally, mark the journal as really needing no recovery. 1575 * This sets s_start==0 in the underlying superblock, which is 1576 * the magic code for a fully-recovered superblock. Any future 1577 * commits of data to the journal will restore the current 1578 * s_start value. */ 1579 spin_lock(&journal->j_state_lock); 1580 old_tail = journal->j_tail; 1581 journal->j_tail = 0; 1582 spin_unlock(&journal->j_state_lock); 1583 jbd2_journal_update_superblock(journal, 1); 1584 spin_lock(&journal->j_state_lock); 1585 journal->j_tail = old_tail; 1586 1587 J_ASSERT(!journal->j_running_transaction); 1588 J_ASSERT(!journal->j_committing_transaction); 1589 J_ASSERT(!journal->j_checkpoint_transactions); 1590 J_ASSERT(journal->j_head == journal->j_tail); 1591 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1592 spin_unlock(&journal->j_state_lock); 1593 return 0; 1594 } 1595 1596 /** 1597 * int jbd2_journal_wipe() - Wipe journal contents 1598 * @journal: Journal to act on. 1599 * @write: flag (see below) 1600 * 1601 * Wipe out all of the contents of a journal, safely. This will produce 1602 * a warning if the journal contains any valid recovery information. 1603 * Must be called between journal_init_*() and jbd2_journal_load(). 1604 * 1605 * If 'write' is non-zero, then we wipe out the journal on disk; otherwise 1606 * we merely suppress recovery. 1607 */ 1608 1609 int jbd2_journal_wipe(journal_t *journal, int write) 1610 { 1611 journal_superblock_t *sb; 1612 int err = 0; 1613 1614 J_ASSERT (!(journal->j_flags & JBD2_LOADED)); 1615 1616 err = load_superblock(journal); 1617 if (err) 1618 return err; 1619 1620 sb = journal->j_superblock; 1621 1622 if (!journal->j_tail) 1623 goto no_recovery; 1624 1625 printk (KERN_WARNING "JBD: %s recovery information on journal\n", 1626 write ? "Clearing" : "Ignoring"); 1627 1628 err = jbd2_journal_skip_recovery(journal); 1629 if (write) 1630 jbd2_journal_update_superblock(journal, 1); 1631 1632 no_recovery: 1633 return err; 1634 } 1635 1636 /* 1637 * Journal abort has very specific semantics, which we describe 1638 * for journal abort. 1639 * 1640 * Two internal functions, which provide abort to the jbd layer 1641 * itself are here. 1642 */ 1643 1644 /* 1645 * Quick version for internal journal use (doesn't lock the journal). 1646 * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, 1647 * and don't attempt to make any other journal updates. 1648 */ 1649 void __jbd2_journal_abort_hard(journal_t *journal) 1650 { 1651 transaction_t *transaction; 1652 1653 if (journal->j_flags & JBD2_ABORT) 1654 return; 1655 1656 printk(KERN_ERR "Aborting journal on device %s.\n", 1657 journal->j_devname); 1658 1659 spin_lock(&journal->j_state_lock); 1660 journal->j_flags |= JBD2_ABORT; 1661 transaction = journal->j_running_transaction; 1662 if (transaction) 1663 __jbd2_log_start_commit(journal, transaction->t_tid); 1664 spin_unlock(&journal->j_state_lock); 1665 } 1666 1667 /* Soft abort: record the abort error status in the journal superblock, 1668 * but don't do any other IO. */ 1669 static void __journal_abort_soft (journal_t *journal, int errno) 1670 { 1671 if (journal->j_flags & JBD2_ABORT) 1672 return; 1673 1674 if (!journal->j_errno) 1675 journal->j_errno = errno; 1676 1677 __jbd2_journal_abort_hard(journal); 1678 1679 if (errno) 1680 jbd2_journal_update_superblock(journal, 1); 1681 } 1682 1683 /** 1684 * void jbd2_journal_abort () - Shutdown the journal immediately. 1685 * @journal: the journal to shutdown. 1686 * @errno: an error number to record in the journal indicating 1687 * the reason for the shutdown. 1688 * 1689 * Perform a complete, immediate shutdown of the ENTIRE 1690 * journal (not of a single transaction). This operation cannot be 1691 * undone without closing and reopening the journal. 1692 * 1693 * The jbd2_journal_abort function is intended to support higher level error 1694 * recovery mechanisms such as the ext2/ext3 remount-readonly error 1695 * mode. 1696 * 1697 * Journal abort has very specific semantics. Any existing dirty, 1698 * unjournaled buffers in the main filesystem will still be written to 1699 * disk by bdflush, but the journaling mechanism will be suspended 1700 * immediately and no further transaction commits will be honoured. 1701 * 1702 * Any dirty, journaled buffers will be written back to disk without 1703 * hitting the journal. Atomicity cannot be guaranteed on an aborted 1704 * filesystem, but we _do_ attempt to leave as much data as possible 1705 * behind for fsck to use for cleanup. 1706 * 1707 * Any attempt to get a new transaction handle on a journal which is in 1708 * ABORT state will just result in an -EROFS error return. A 1709 * jbd2_journal_stop on an existing handle will return -EIO if we have 1710 * entered abort state during the update. 1711 * 1712 * Recursive transactions are not disturbed by journal abort until the 1713 * final jbd2_journal_stop, which will receive the -EIO error. 1714 * 1715 * Finally, the jbd2_journal_abort call allows the caller to supply an errno 1716 * which will be recorded (if possible) in the journal superblock. This 1717 * allows a client to record failure conditions in the middle of a 1718 * transaction without having to complete the transaction to record the 1719 * failure to disk. ext3_error, for example, now uses this 1720 * functionality. 1721 * 1722 * Errors which originate from within the journaling layer will NOT 1723 * supply an errno; a null errno implies that absolutely no further 1724 * writes are done to the journal (unless there are any already in 1725 * progress). 1726 * 1727 */ 1728 1729 void jbd2_journal_abort(journal_t *journal, int errno) 1730 { 1731 __journal_abort_soft(journal, errno); 1732 } 1733 1734 /** 1735 * int jbd2_journal_errno () - returns the journal's error state. 1736 * @journal: journal to examine. 1737 * 1738 * This is the errno number set with jbd2_journal_abort(), the last 1739 * time the journal was mounted - if the journal was stopped 1740 * without calling abort this will be 0. 1741 * 1742 * If the journal has been aborted on this mount time -EROFS will 1743 * be returned. 1744 */ 1745 int jbd2_journal_errno(journal_t *journal) 1746 { 1747 int err; 1748 1749 spin_lock(&journal->j_state_lock); 1750 if (journal->j_flags & JBD2_ABORT) 1751 err = -EROFS; 1752 else 1753 err = journal->j_errno; 1754 spin_unlock(&journal->j_state_lock); 1755 return err; 1756 } 1757 1758 /** 1759 * int jbd2_journal_clear_err () - clears the journal's error state 1760 * @journal: journal to act on. 1761 * 1762 * An error must be cleared or acked to take a FS out of readonly 1763 * mode. 1764 */ 1765 int jbd2_journal_clear_err(journal_t *journal) 1766 { 1767 int err = 0; 1768 1769 spin_lock(&journal->j_state_lock); 1770 if (journal->j_flags & JBD2_ABORT) 1771 err = -EROFS; 1772 else 1773 journal->j_errno = 0; 1774 spin_unlock(&journal->j_state_lock); 1775 return err; 1776 } 1777 1778 /** 1779 * void jbd2_journal_ack_err() - Ack journal err. 1780 * @journal: journal to act on. 1781 * 1782 * An error must be cleared or acked to take a FS out of readonly 1783 * mode. 1784 */ 1785 void jbd2_journal_ack_err(journal_t *journal) 1786 { 1787 spin_lock(&journal->j_state_lock); 1788 if (journal->j_errno) 1789 journal->j_flags |= JBD2_ACK_ERR; 1790 spin_unlock(&journal->j_state_lock); 1791 } 1792 1793 int jbd2_journal_blocks_per_page(struct inode *inode) 1794 { 1795 return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 1796 } 1797 1798 /* 1799 * helper functions to deal with 32 or 64bit block numbers. 1800 */ 1801 size_t journal_tag_bytes(journal_t *journal) 1802 { 1803 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 1804 return JBD2_TAG_SIZE64; 1805 else 1806 return JBD2_TAG_SIZE32; 1807 } 1808 1809 /* 1810 * Journal_head storage management 1811 */ 1812 static struct kmem_cache *jbd2_journal_head_cache; 1813 #ifdef CONFIG_JBD2_DEBUG 1814 static atomic_t nr_journal_heads = ATOMIC_INIT(0); 1815 #endif 1816 1817 static int journal_init_jbd2_journal_head_cache(void) 1818 { 1819 int retval; 1820 1821 J_ASSERT(jbd2_journal_head_cache == NULL); 1822 jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", 1823 sizeof(struct journal_head), 1824 0, /* offset */ 1825 SLAB_TEMPORARY, /* flags */ 1826 NULL); /* ctor */ 1827 retval = 0; 1828 if (!jbd2_journal_head_cache) { 1829 retval = -ENOMEM; 1830 printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); 1831 } 1832 return retval; 1833 } 1834 1835 static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 1836 { 1837 if (jbd2_journal_head_cache) { 1838 kmem_cache_destroy(jbd2_journal_head_cache); 1839 jbd2_journal_head_cache = NULL; 1840 } 1841 } 1842 1843 /* 1844 * journal_head splicing and dicing 1845 */ 1846 static struct journal_head *journal_alloc_journal_head(void) 1847 { 1848 struct journal_head *ret; 1849 static unsigned long last_warning; 1850 1851 #ifdef CONFIG_JBD2_DEBUG 1852 atomic_inc(&nr_journal_heads); 1853 #endif 1854 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 1855 if (!ret) { 1856 jbd_debug(1, "out of memory for journal_head\n"); 1857 if (time_after(jiffies, last_warning + 5*HZ)) { 1858 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1859 __func__); 1860 last_warning = jiffies; 1861 } 1862 while (!ret) { 1863 yield(); 1864 ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); 1865 } 1866 } 1867 return ret; 1868 } 1869 1870 static void journal_free_journal_head(struct journal_head *jh) 1871 { 1872 #ifdef CONFIG_JBD2_DEBUG 1873 atomic_dec(&nr_journal_heads); 1874 memset(jh, JBD2_POISON_FREE, sizeof(*jh)); 1875 #endif 1876 kmem_cache_free(jbd2_journal_head_cache, jh); 1877 } 1878 1879 /* 1880 * A journal_head is attached to a buffer_head whenever JBD has an 1881 * interest in the buffer. 1882 * 1883 * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit 1884 * is set. This bit is tested in core kernel code where we need to take 1885 * JBD-specific actions. Testing the zeroness of ->b_private is not reliable 1886 * there. 1887 * 1888 * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. 1889 * 1890 * When a buffer has its BH_JBD bit set it is immune from being released by 1891 * core kernel code, mainly via ->b_count. 1892 * 1893 * A journal_head may be detached from its buffer_head when the journal_head's 1894 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. 1895 * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the 1896 * journal_head can be dropped if needed. 1897 * 1898 * Various places in the kernel want to attach a journal_head to a buffer_head 1899 * _before_ attaching the journal_head to a transaction. To protect the 1900 * journal_head in this situation, jbd2_journal_add_journal_head elevates the 1901 * journal_head's b_jcount refcount by one. The caller must call 1902 * jbd2_journal_put_journal_head() to undo this. 1903 * 1904 * So the typical usage would be: 1905 * 1906 * (Attach a journal_head if needed. Increments b_jcount) 1907 * struct journal_head *jh = jbd2_journal_add_journal_head(bh); 1908 * ... 1909 * jh->b_transaction = xxx; 1910 * jbd2_journal_put_journal_head(jh); 1911 * 1912 * Now, the journal_head's b_jcount is zero, but it is safe from being released 1913 * because it has a non-zero b_transaction. 1914 */ 1915 1916 /* 1917 * Give a buffer_head a journal_head. 1918 * 1919 * Doesn't need the journal lock. 1920 * May sleep. 1921 */ 1922 struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) 1923 { 1924 struct journal_head *jh; 1925 struct journal_head *new_jh = NULL; 1926 1927 repeat: 1928 if (!buffer_jbd(bh)) { 1929 new_jh = journal_alloc_journal_head(); 1930 memset(new_jh, 0, sizeof(*new_jh)); 1931 } 1932 1933 jbd_lock_bh_journal_head(bh); 1934 if (buffer_jbd(bh)) { 1935 jh = bh2jh(bh); 1936 } else { 1937 J_ASSERT_BH(bh, 1938 (atomic_read(&bh->b_count) > 0) || 1939 (bh->b_page && bh->b_page->mapping)); 1940 1941 if (!new_jh) { 1942 jbd_unlock_bh_journal_head(bh); 1943 goto repeat; 1944 } 1945 1946 jh = new_jh; 1947 new_jh = NULL; /* We consumed it */ 1948 set_buffer_jbd(bh); 1949 bh->b_private = jh; 1950 jh->b_bh = bh; 1951 get_bh(bh); 1952 BUFFER_TRACE(bh, "added journal_head"); 1953 } 1954 jh->b_jcount++; 1955 jbd_unlock_bh_journal_head(bh); 1956 if (new_jh) 1957 journal_free_journal_head(new_jh); 1958 return bh->b_private; 1959 } 1960 1961 /* 1962 * Grab a ref against this buffer_head's journal_head. If it ended up not 1963 * having a journal_head, return NULL 1964 */ 1965 struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) 1966 { 1967 struct journal_head *jh = NULL; 1968 1969 jbd_lock_bh_journal_head(bh); 1970 if (buffer_jbd(bh)) { 1971 jh = bh2jh(bh); 1972 jh->b_jcount++; 1973 } 1974 jbd_unlock_bh_journal_head(bh); 1975 return jh; 1976 } 1977 1978 static void __journal_remove_journal_head(struct buffer_head *bh) 1979 { 1980 struct journal_head *jh = bh2jh(bh); 1981 1982 J_ASSERT_JH(jh, jh->b_jcount >= 0); 1983 1984 get_bh(bh); 1985 if (jh->b_jcount == 0) { 1986 if (jh->b_transaction == NULL && 1987 jh->b_next_transaction == NULL && 1988 jh->b_cp_transaction == NULL) { 1989 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 1990 J_ASSERT_BH(bh, buffer_jbd(bh)); 1991 J_ASSERT_BH(bh, jh2bh(jh) == bh); 1992 BUFFER_TRACE(bh, "remove journal_head"); 1993 if (jh->b_frozen_data) { 1994 printk(KERN_WARNING "%s: freeing " 1995 "b_frozen_data\n", 1996 __func__); 1997 jbd2_free(jh->b_frozen_data, bh->b_size); 1998 } 1999 if (jh->b_committed_data) { 2000 printk(KERN_WARNING "%s: freeing " 2001 "b_committed_data\n", 2002 __func__); 2003 jbd2_free(jh->b_committed_data, bh->b_size); 2004 } 2005 bh->b_private = NULL; 2006 jh->b_bh = NULL; /* debug, really */ 2007 clear_buffer_jbd(bh); 2008 __brelse(bh); 2009 journal_free_journal_head(jh); 2010 } else { 2011 BUFFER_TRACE(bh, "journal_head was locked"); 2012 } 2013 } 2014 } 2015 2016 /* 2017 * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction 2018 * and has a zero b_jcount then remove and release its journal_head. If we did 2019 * see that the buffer is not used by any transaction we also "logically" 2020 * decrement ->b_count. 2021 * 2022 * We in fact take an additional increment on ->b_count as a convenience, 2023 * because the caller usually wants to do additional things with the bh 2024 * after calling here. 2025 * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some 2026 * time. Once the caller has run __brelse(), the buffer is eligible for 2027 * reaping by try_to_free_buffers(). 2028 */ 2029 void jbd2_journal_remove_journal_head(struct buffer_head *bh) 2030 { 2031 jbd_lock_bh_journal_head(bh); 2032 __journal_remove_journal_head(bh); 2033 jbd_unlock_bh_journal_head(bh); 2034 } 2035 2036 /* 2037 * Drop a reference on the passed journal_head. If it fell to zero then try to 2038 * release the journal_head from the buffer_head. 2039 */ 2040 void jbd2_journal_put_journal_head(struct journal_head *jh) 2041 { 2042 struct buffer_head *bh = jh2bh(jh); 2043 2044 jbd_lock_bh_journal_head(bh); 2045 J_ASSERT_JH(jh, jh->b_jcount > 0); 2046 --jh->b_jcount; 2047 if (!jh->b_jcount && !jh->b_transaction) { 2048 __journal_remove_journal_head(bh); 2049 __brelse(bh); 2050 } 2051 jbd_unlock_bh_journal_head(bh); 2052 } 2053 2054 /* 2055 * Initialize jbd inode head 2056 */ 2057 void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) 2058 { 2059 jinode->i_transaction = NULL; 2060 jinode->i_next_transaction = NULL; 2061 jinode->i_vfs_inode = inode; 2062 jinode->i_flags = 0; 2063 INIT_LIST_HEAD(&jinode->i_list); 2064 } 2065 2066 /* 2067 * Function to be called before we start removing inode from memory (i.e., 2068 * clear_inode() is a fine place to be called from). It removes inode from 2069 * transaction's lists. 2070 */ 2071 void jbd2_journal_release_jbd_inode(journal_t *journal, 2072 struct jbd2_inode *jinode) 2073 { 2074 int writeout = 0; 2075 2076 if (!journal) 2077 return; 2078 restart: 2079 spin_lock(&journal->j_list_lock); 2080 /* Is commit writing out inode - we have to wait */ 2081 if (jinode->i_flags & JI_COMMIT_RUNNING) { 2082 wait_queue_head_t *wq; 2083 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 2084 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 2085 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 2086 spin_unlock(&journal->j_list_lock); 2087 schedule(); 2088 finish_wait(wq, &wait.wait); 2089 goto restart; 2090 } 2091 2092 /* Do we need to wait for data writeback? */ 2093 if (journal->j_committing_transaction == jinode->i_transaction) 2094 writeout = 1; 2095 if (jinode->i_transaction) { 2096 list_del(&jinode->i_list); 2097 jinode->i_transaction = NULL; 2098 } 2099 spin_unlock(&journal->j_list_lock); 2100 } 2101 2102 /* 2103 * debugfs tunables 2104 */ 2105 #ifdef CONFIG_JBD2_DEBUG 2106 u8 jbd2_journal_enable_debug __read_mostly; 2107 EXPORT_SYMBOL(jbd2_journal_enable_debug); 2108 2109 #define JBD2_DEBUG_NAME "jbd2-debug" 2110 2111 static struct dentry *jbd2_debugfs_dir; 2112 static struct dentry *jbd2_debug; 2113 2114 static void __init jbd2_create_debugfs_entry(void) 2115 { 2116 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL); 2117 if (jbd2_debugfs_dir) 2118 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, 2119 S_IRUGO | S_IWUSR, 2120 jbd2_debugfs_dir, 2121 &jbd2_journal_enable_debug); 2122 } 2123 2124 static void __exit jbd2_remove_debugfs_entry(void) 2125 { 2126 debugfs_remove(jbd2_debug); 2127 debugfs_remove(jbd2_debugfs_dir); 2128 } 2129 2130 #else 2131 2132 static void __init jbd2_create_debugfs_entry(void) 2133 { 2134 } 2135 2136 static void __exit jbd2_remove_debugfs_entry(void) 2137 { 2138 } 2139 2140 #endif 2141 2142 #ifdef CONFIG_PROC_FS 2143 2144 #define JBD2_STATS_PROC_NAME "fs/jbd2" 2145 2146 static void __init jbd2_create_jbd_stats_proc_entry(void) 2147 { 2148 proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL); 2149 } 2150 2151 static void __exit jbd2_remove_jbd_stats_proc_entry(void) 2152 { 2153 if (proc_jbd2_stats) 2154 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL); 2155 } 2156 2157 #else 2158 2159 #define jbd2_create_jbd_stats_proc_entry() do {} while (0) 2160 #define jbd2_remove_jbd_stats_proc_entry() do {} while (0) 2161 2162 #endif 2163 2164 struct kmem_cache *jbd2_handle_cache; 2165 2166 static int __init journal_init_handle_cache(void) 2167 { 2168 jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", 2169 sizeof(handle_t), 2170 0, /* offset */ 2171 SLAB_TEMPORARY, /* flags */ 2172 NULL); /* ctor */ 2173 if (jbd2_handle_cache == NULL) { 2174 printk(KERN_EMERG "JBD: failed to create handle cache\n"); 2175 return -ENOMEM; 2176 } 2177 return 0; 2178 } 2179 2180 static void jbd2_journal_destroy_handle_cache(void) 2181 { 2182 if (jbd2_handle_cache) 2183 kmem_cache_destroy(jbd2_handle_cache); 2184 } 2185 2186 /* 2187 * Module startup and shutdown 2188 */ 2189 2190 static int __init journal_init_caches(void) 2191 { 2192 int ret; 2193 2194 ret = jbd2_journal_init_revoke_caches(); 2195 if (ret == 0) 2196 ret = journal_init_jbd2_journal_head_cache(); 2197 if (ret == 0) 2198 ret = journal_init_handle_cache(); 2199 return ret; 2200 } 2201 2202 static void jbd2_journal_destroy_caches(void) 2203 { 2204 jbd2_journal_destroy_revoke_caches(); 2205 jbd2_journal_destroy_jbd2_journal_head_cache(); 2206 jbd2_journal_destroy_handle_cache(); 2207 } 2208 2209 static int __init journal_init(void) 2210 { 2211 int ret; 2212 2213 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2214 2215 ret = journal_init_caches(); 2216 if (ret == 0) { 2217 jbd2_create_debugfs_entry(); 2218 jbd2_create_jbd_stats_proc_entry(); 2219 } else { 2220 jbd2_journal_destroy_caches(); 2221 } 2222 return ret; 2223 } 2224 2225 static void __exit journal_exit(void) 2226 { 2227 #ifdef CONFIG_JBD2_DEBUG 2228 int n = atomic_read(&nr_journal_heads); 2229 if (n) 2230 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); 2231 #endif 2232 jbd2_remove_debugfs_entry(); 2233 jbd2_remove_jbd_stats_proc_entry(); 2234 jbd2_journal_destroy_caches(); 2235 } 2236 2237 /* 2238 * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 2239 * tracing infrastructure to map a dev_t to a device name. 2240 * 2241 * The caller should use rcu_read_lock() in order to make sure the 2242 * device name stays valid until its done with it. We use 2243 * rcu_read_lock() as well to make sure we're safe in case the caller 2244 * gets sloppy, and because rcu_read_lock() is cheap and can be safely 2245 * nested. 2246 */ 2247 struct devname_cache { 2248 struct rcu_head rcu; 2249 dev_t device; 2250 char devname[BDEVNAME_SIZE]; 2251 }; 2252 #define CACHE_SIZE_BITS 6 2253 static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; 2254 static DEFINE_SPINLOCK(devname_cache_lock); 2255 2256 static void free_devcache(struct rcu_head *rcu) 2257 { 2258 kfree(rcu); 2259 } 2260 2261 const char *jbd2_dev_to_name(dev_t device) 2262 { 2263 int i = hash_32(device, CACHE_SIZE_BITS); 2264 char *ret; 2265 struct block_device *bd; 2266 static struct devname_cache *new_dev; 2267 2268 rcu_read_lock(); 2269 if (devcache[i] && devcache[i]->device == device) { 2270 ret = devcache[i]->devname; 2271 rcu_read_unlock(); 2272 return ret; 2273 } 2274 rcu_read_unlock(); 2275 2276 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2277 if (!new_dev) 2278 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ 2279 spin_lock(&devname_cache_lock); 2280 if (devcache[i]) { 2281 if (devcache[i]->device == device) { 2282 kfree(new_dev); 2283 ret = devcache[i]->devname; 2284 spin_unlock(&devname_cache_lock); 2285 return ret; 2286 } 2287 call_rcu(&devcache[i]->rcu, free_devcache); 2288 } 2289 devcache[i] = new_dev; 2290 devcache[i]->device = device; 2291 bd = bdget(device); 2292 if (bd) { 2293 bdevname(bd, devcache[i]->devname); 2294 bdput(bd); 2295 } else 2296 __bdevname(device, devcache[i]->devname); 2297 ret = devcache[i]->devname; 2298 spin_unlock(&devname_cache_lock); 2299 return ret; 2300 } 2301 EXPORT_SYMBOL(jbd2_dev_to_name); 2302 2303 MODULE_LICENSE("GPL"); 2304 module_init(journal_init); 2305 module_exit(journal_exit); 2306 2307