1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/jbd2.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/blkdev.h> 28 #include <linux/parser.h> 29 #include <linux/smp_lock.h> 30 #include <linux/buffer_head.h> 31 #include <linux/exportfs.h> 32 #include <linux/vfs.h> 33 #include <linux/random.h> 34 #include <linux/mount.h> 35 #include <linux/namei.h> 36 #include <linux/quotaops.h> 37 #include <linux/seq_file.h> 38 #include <linux/proc_fs.h> 39 #include <linux/ctype.h> 40 #include <linux/log2.h> 41 #include <linux/crc16.h> 42 #include <asm/uaccess.h> 43 44 #include "ext4.h" 45 #include "ext4_jbd2.h" 46 #include "xattr.h" 47 #include "acl.h" 48 #include "mballoc.h" 49 50 #define CREATE_TRACE_POINTS 51 #include <trace/events/ext4.h> 52 53 struct proc_dir_entry *ext4_proc_root; 54 static struct kset *ext4_kset; 55 56 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 57 unsigned long journal_devnum); 58 static int ext4_commit_super(struct super_block *sb, int sync); 59 static void ext4_mark_recovery_complete(struct super_block *sb, 60 struct ext4_super_block *es); 61 static void ext4_clear_journal_err(struct super_block *sb, 62 struct ext4_super_block *es); 63 static int ext4_sync_fs(struct super_block *sb, int wait); 64 static const char *ext4_decode_error(struct super_block *sb, int errno, 65 char nbuf[16]); 66 static int ext4_remount(struct super_block *sb, int *flags, char *data); 67 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 68 static int ext4_unfreeze(struct super_block *sb); 69 static void ext4_write_super(struct super_block *sb); 70 static int ext4_freeze(struct super_block *sb); 71 72 73 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 74 struct ext4_group_desc *bg) 75 { 76 return le32_to_cpu(bg->bg_block_bitmap_lo) | 77 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 78 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 79 } 80 81 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 82 struct ext4_group_desc *bg) 83 { 84 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 85 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 86 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 87 } 88 89 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 90 struct ext4_group_desc *bg) 91 { 92 return le32_to_cpu(bg->bg_inode_table_lo) | 93 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 94 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 95 } 96 97 __u32 ext4_free_blks_count(struct super_block *sb, 98 struct ext4_group_desc *bg) 99 { 100 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 101 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 102 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 103 } 104 105 __u32 ext4_free_inodes_count(struct super_block *sb, 106 struct ext4_group_desc *bg) 107 { 108 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 109 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 110 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 111 } 112 113 __u32 ext4_used_dirs_count(struct super_block *sb, 114 struct ext4_group_desc *bg) 115 { 116 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 117 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 118 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 119 } 120 121 __u32 ext4_itable_unused_count(struct super_block *sb, 122 struct ext4_group_desc *bg) 123 { 124 return le16_to_cpu(bg->bg_itable_unused_lo) | 125 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 126 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 127 } 128 129 void ext4_block_bitmap_set(struct super_block *sb, 130 struct ext4_group_desc *bg, ext4_fsblk_t blk) 131 { 132 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 133 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 134 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 135 } 136 137 void ext4_inode_bitmap_set(struct super_block *sb, 138 struct ext4_group_desc *bg, ext4_fsblk_t blk) 139 { 140 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 141 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 142 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 143 } 144 145 void ext4_inode_table_set(struct super_block *sb, 146 struct ext4_group_desc *bg, ext4_fsblk_t blk) 147 { 148 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 149 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 150 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 151 } 152 153 void ext4_free_blks_set(struct super_block *sb, 154 struct ext4_group_desc *bg, __u32 count) 155 { 156 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 157 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 158 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 159 } 160 161 void ext4_free_inodes_set(struct super_block *sb, 162 struct ext4_group_desc *bg, __u32 count) 163 { 164 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 165 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 166 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 167 } 168 169 void ext4_used_dirs_set(struct super_block *sb, 170 struct ext4_group_desc *bg, __u32 count) 171 { 172 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 173 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 174 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 175 } 176 177 void ext4_itable_unused_set(struct super_block *sb, 178 struct ext4_group_desc *bg, __u32 count) 179 { 180 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 181 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 182 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 183 } 184 185 186 /* Just increment the non-pointer handle value */ 187 static handle_t *ext4_get_nojournal(void) 188 { 189 handle_t *handle = current->journal_info; 190 unsigned long ref_cnt = (unsigned long)handle; 191 192 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); 193 194 ref_cnt++; 195 handle = (handle_t *)ref_cnt; 196 197 current->journal_info = handle; 198 return handle; 199 } 200 201 202 /* Decrement the non-pointer handle value */ 203 static void ext4_put_nojournal(handle_t *handle) 204 { 205 unsigned long ref_cnt = (unsigned long)handle; 206 207 BUG_ON(ref_cnt == 0); 208 209 ref_cnt--; 210 handle = (handle_t *)ref_cnt; 211 212 current->journal_info = handle; 213 } 214 215 /* 216 * Wrappers for jbd2_journal_start/end. 217 * 218 * The only special thing we need to do here is to make sure that all 219 * journal_end calls result in the superblock being marked dirty, so 220 * that sync() will call the filesystem's write_super callback if 221 * appropriate. 222 */ 223 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 224 { 225 journal_t *journal; 226 227 if (sb->s_flags & MS_RDONLY) 228 return ERR_PTR(-EROFS); 229 230 /* Special case here: if the journal has aborted behind our 231 * backs (eg. EIO in the commit thread), then we still need to 232 * take the FS itself readonly cleanly. */ 233 journal = EXT4_SB(sb)->s_journal; 234 if (journal) { 235 if (is_journal_aborted(journal)) { 236 ext4_abort(sb, __func__, "Detected aborted journal"); 237 return ERR_PTR(-EROFS); 238 } 239 return jbd2_journal_start(journal, nblocks); 240 } 241 return ext4_get_nojournal(); 242 } 243 244 /* 245 * The only special thing we need to do here is to make sure that all 246 * jbd2_journal_stop calls result in the superblock being marked dirty, so 247 * that sync() will call the filesystem's write_super callback if 248 * appropriate. 249 */ 250 int __ext4_journal_stop(const char *where, handle_t *handle) 251 { 252 struct super_block *sb; 253 int err; 254 int rc; 255 256 if (!ext4_handle_valid(handle)) { 257 ext4_put_nojournal(handle); 258 return 0; 259 } 260 sb = handle->h_transaction->t_journal->j_private; 261 err = handle->h_err; 262 rc = jbd2_journal_stop(handle); 263 264 if (!err) 265 err = rc; 266 if (err) 267 __ext4_std_error(sb, where, err); 268 return err; 269 } 270 271 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 272 struct buffer_head *bh, handle_t *handle, int err) 273 { 274 char nbuf[16]; 275 const char *errstr = ext4_decode_error(NULL, err, nbuf); 276 277 BUG_ON(!ext4_handle_valid(handle)); 278 279 if (bh) 280 BUFFER_TRACE(bh, "abort"); 281 282 if (!handle->h_err) 283 handle->h_err = err; 284 285 if (is_handle_aborted(handle)) 286 return; 287 288 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 289 caller, errstr, err_fn); 290 291 jbd2_journal_abort_handle(handle); 292 } 293 294 /* Deal with the reporting of failure conditions on a filesystem such as 295 * inconsistencies detected or read IO failures. 296 * 297 * On ext2, we can store the error state of the filesystem in the 298 * superblock. That is not possible on ext4, because we may have other 299 * write ordering constraints on the superblock which prevent us from 300 * writing it out straight away; and given that the journal is about to 301 * be aborted, we can't rely on the current, or future, transactions to 302 * write out the superblock safely. 303 * 304 * We'll just use the jbd2_journal_abort() error code to record an error in 305 * the journal instead. On recovery, the journal will compain about 306 * that error until we've noted it down and cleared it. 307 */ 308 309 static void ext4_handle_error(struct super_block *sb) 310 { 311 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 312 313 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 314 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 315 316 if (sb->s_flags & MS_RDONLY) 317 return; 318 319 if (!test_opt(sb, ERRORS_CONT)) { 320 journal_t *journal = EXT4_SB(sb)->s_journal; 321 322 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 323 if (journal) 324 jbd2_journal_abort(journal, -EIO); 325 } 326 if (test_opt(sb, ERRORS_RO)) { 327 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 328 sb->s_flags |= MS_RDONLY; 329 } 330 ext4_commit_super(sb, 1); 331 if (test_opt(sb, ERRORS_PANIC)) 332 panic("EXT4-fs (device %s): panic forced after error\n", 333 sb->s_id); 334 } 335 336 void ext4_error(struct super_block *sb, const char *function, 337 const char *fmt, ...) 338 { 339 va_list args; 340 341 va_start(args, fmt); 342 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 343 vprintk(fmt, args); 344 printk("\n"); 345 va_end(args); 346 347 ext4_handle_error(sb); 348 } 349 350 static const char *ext4_decode_error(struct super_block *sb, int errno, 351 char nbuf[16]) 352 { 353 char *errstr = NULL; 354 355 switch (errno) { 356 case -EIO: 357 errstr = "IO failure"; 358 break; 359 case -ENOMEM: 360 errstr = "Out of memory"; 361 break; 362 case -EROFS: 363 if (!sb || (EXT4_SB(sb)->s_journal && 364 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 365 errstr = "Journal has aborted"; 366 else 367 errstr = "Readonly filesystem"; 368 break; 369 default: 370 /* If the caller passed in an extra buffer for unknown 371 * errors, textualise them now. Else we just return 372 * NULL. */ 373 if (nbuf) { 374 /* Check for truncated error codes... */ 375 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 376 errstr = nbuf; 377 } 378 break; 379 } 380 381 return errstr; 382 } 383 384 /* __ext4_std_error decodes expected errors from journaling functions 385 * automatically and invokes the appropriate error response. */ 386 387 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 388 { 389 char nbuf[16]; 390 const char *errstr; 391 392 /* Special case: if the error is EROFS, and we're not already 393 * inside a transaction, then there's really no point in logging 394 * an error. */ 395 if (errno == -EROFS && journal_current_handle() == NULL && 396 (sb->s_flags & MS_RDONLY)) 397 return; 398 399 errstr = ext4_decode_error(sb, errno, nbuf); 400 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 401 sb->s_id, function, errstr); 402 403 ext4_handle_error(sb); 404 } 405 406 /* 407 * ext4_abort is a much stronger failure handler than ext4_error. The 408 * abort function may be used to deal with unrecoverable failures such 409 * as journal IO errors or ENOMEM at a critical moment in log management. 410 * 411 * We unconditionally force the filesystem into an ABORT|READONLY state, 412 * unless the error response on the fs has been set to panic in which 413 * case we take the easy way out and panic immediately. 414 */ 415 416 void ext4_abort(struct super_block *sb, const char *function, 417 const char *fmt, ...) 418 { 419 va_list args; 420 421 va_start(args, fmt); 422 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 423 vprintk(fmt, args); 424 printk("\n"); 425 va_end(args); 426 427 if (test_opt(sb, ERRORS_PANIC)) 428 panic("EXT4-fs panic from previous error\n"); 429 430 if (sb->s_flags & MS_RDONLY) 431 return; 432 433 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 434 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 435 sb->s_flags |= MS_RDONLY; 436 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 437 if (EXT4_SB(sb)->s_journal) 438 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 439 } 440 441 void ext4_msg (struct super_block * sb, const char *prefix, 442 const char *fmt, ...) 443 { 444 va_list args; 445 446 va_start(args, fmt); 447 printk("%sEXT4-fs (%s): ", prefix, sb->s_id); 448 vprintk(fmt, args); 449 printk("\n"); 450 va_end(args); 451 } 452 453 void ext4_warning(struct super_block *sb, const char *function, 454 const char *fmt, ...) 455 { 456 va_list args; 457 458 va_start(args, fmt); 459 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 460 sb->s_id, function); 461 vprintk(fmt, args); 462 printk("\n"); 463 va_end(args); 464 } 465 466 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 467 const char *function, const char *fmt, ...) 468 __releases(bitlock) 469 __acquires(bitlock) 470 { 471 va_list args; 472 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 473 474 va_start(args, fmt); 475 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 476 vprintk(fmt, args); 477 printk("\n"); 478 va_end(args); 479 480 if (test_opt(sb, ERRORS_CONT)) { 481 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 482 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 483 ext4_commit_super(sb, 0); 484 return; 485 } 486 ext4_unlock_group(sb, grp); 487 ext4_handle_error(sb); 488 /* 489 * We only get here in the ERRORS_RO case; relocking the group 490 * may be dangerous, but nothing bad will happen since the 491 * filesystem will have already been marked read/only and the 492 * journal has been aborted. We return 1 as a hint to callers 493 * who might what to use the return value from 494 * ext4_grp_locked_error() to distinguish beween the 495 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 496 * aggressively from the ext4 function in question, with a 497 * more appropriate error code. 498 */ 499 ext4_lock_group(sb, grp); 500 return; 501 } 502 503 void ext4_update_dynamic_rev(struct super_block *sb) 504 { 505 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 506 507 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 508 return; 509 510 ext4_warning(sb, __func__, 511 "updating to rev %d because of new feature flag, " 512 "running e2fsck is recommended", 513 EXT4_DYNAMIC_REV); 514 515 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 516 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 517 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 518 /* leave es->s_feature_*compat flags alone */ 519 /* es->s_uuid will be set by e2fsck if empty */ 520 521 /* 522 * The rest of the superblock fields should be zero, and if not it 523 * means they are likely already in use, so leave them alone. We 524 * can leave it up to e2fsck to clean up any inconsistencies there. 525 */ 526 } 527 528 /* 529 * Open the external journal device 530 */ 531 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 532 { 533 struct block_device *bdev; 534 char b[BDEVNAME_SIZE]; 535 536 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 537 if (IS_ERR(bdev)) 538 goto fail; 539 return bdev; 540 541 fail: 542 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 543 __bdevname(dev, b), PTR_ERR(bdev)); 544 return NULL; 545 } 546 547 /* 548 * Release the journal device 549 */ 550 static int ext4_blkdev_put(struct block_device *bdev) 551 { 552 bd_release(bdev); 553 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 554 } 555 556 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 557 { 558 struct block_device *bdev; 559 int ret = -ENODEV; 560 561 bdev = sbi->journal_bdev; 562 if (bdev) { 563 ret = ext4_blkdev_put(bdev); 564 sbi->journal_bdev = NULL; 565 } 566 return ret; 567 } 568 569 static inline struct inode *orphan_list_entry(struct list_head *l) 570 { 571 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 572 } 573 574 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 575 { 576 struct list_head *l; 577 578 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 579 le32_to_cpu(sbi->s_es->s_last_orphan)); 580 581 printk(KERN_ERR "sb_info orphan list:\n"); 582 list_for_each(l, &sbi->s_orphan) { 583 struct inode *inode = orphan_list_entry(l); 584 printk(KERN_ERR " " 585 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 586 inode->i_sb->s_id, inode->i_ino, inode, 587 inode->i_mode, inode->i_nlink, 588 NEXT_ORPHAN(inode)); 589 } 590 } 591 592 static void ext4_put_super(struct super_block *sb) 593 { 594 struct ext4_sb_info *sbi = EXT4_SB(sb); 595 struct ext4_super_block *es = sbi->s_es; 596 int i, err; 597 598 flush_workqueue(sbi->dio_unwritten_wq); 599 destroy_workqueue(sbi->dio_unwritten_wq); 600 601 lock_super(sb); 602 lock_kernel(); 603 if (sb->s_dirt) 604 ext4_commit_super(sb, 1); 605 606 if (sbi->s_journal) { 607 err = jbd2_journal_destroy(sbi->s_journal); 608 sbi->s_journal = NULL; 609 if (err < 0) 610 ext4_abort(sb, __func__, 611 "Couldn't clean up the journal"); 612 } 613 614 ext4_release_system_zone(sb); 615 ext4_mb_release(sb); 616 ext4_ext_release(sb); 617 ext4_xattr_put_super(sb); 618 619 if (!(sb->s_flags & MS_RDONLY)) { 620 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 621 es->s_state = cpu_to_le16(sbi->s_mount_state); 622 ext4_commit_super(sb, 1); 623 } 624 if (sbi->s_proc) { 625 remove_proc_entry(sb->s_id, ext4_proc_root); 626 } 627 kobject_del(&sbi->s_kobj); 628 629 for (i = 0; i < sbi->s_gdb_count; i++) 630 brelse(sbi->s_group_desc[i]); 631 kfree(sbi->s_group_desc); 632 if (is_vmalloc_addr(sbi->s_flex_groups)) 633 vfree(sbi->s_flex_groups); 634 else 635 kfree(sbi->s_flex_groups); 636 percpu_counter_destroy(&sbi->s_freeblocks_counter); 637 percpu_counter_destroy(&sbi->s_freeinodes_counter); 638 percpu_counter_destroy(&sbi->s_dirs_counter); 639 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 640 brelse(sbi->s_sbh); 641 #ifdef CONFIG_QUOTA 642 for (i = 0; i < MAXQUOTAS; i++) 643 kfree(sbi->s_qf_names[i]); 644 #endif 645 646 /* Debugging code just in case the in-memory inode orphan list 647 * isn't empty. The on-disk one can be non-empty if we've 648 * detected an error and taken the fs readonly, but the 649 * in-memory list had better be clean by this point. */ 650 if (!list_empty(&sbi->s_orphan)) 651 dump_orphan_list(sb, sbi); 652 J_ASSERT(list_empty(&sbi->s_orphan)); 653 654 invalidate_bdev(sb->s_bdev); 655 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 656 /* 657 * Invalidate the journal device's buffers. We don't want them 658 * floating about in memory - the physical journal device may 659 * hotswapped, and it breaks the `ro-after' testing code. 660 */ 661 sync_blockdev(sbi->journal_bdev); 662 invalidate_bdev(sbi->journal_bdev); 663 ext4_blkdev_remove(sbi); 664 } 665 sb->s_fs_info = NULL; 666 /* 667 * Now that we are completely done shutting down the 668 * superblock, we need to actually destroy the kobject. 669 */ 670 unlock_kernel(); 671 unlock_super(sb); 672 kobject_put(&sbi->s_kobj); 673 wait_for_completion(&sbi->s_kobj_unregister); 674 kfree(sbi->s_blockgroup_lock); 675 kfree(sbi); 676 } 677 678 static struct kmem_cache *ext4_inode_cachep; 679 680 /* 681 * Called inside transaction, so use GFP_NOFS 682 */ 683 static struct inode *ext4_alloc_inode(struct super_block *sb) 684 { 685 struct ext4_inode_info *ei; 686 687 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 688 if (!ei) 689 return NULL; 690 691 ei->vfs_inode.i_version = 1; 692 ei->vfs_inode.i_data.writeback_index = 0; 693 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 694 INIT_LIST_HEAD(&ei->i_prealloc_list); 695 spin_lock_init(&ei->i_prealloc_lock); 696 /* 697 * Note: We can be called before EXT4_SB(sb)->s_journal is set, 698 * therefore it can be null here. Don't check it, just initialize 699 * jinode. 700 */ 701 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 702 ei->i_reserved_data_blocks = 0; 703 ei->i_reserved_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0; 705 ei->i_da_metadata_calc_len = 0; 706 ei->i_delalloc_reserved_flag = 0; 707 spin_lock_init(&(ei->i_block_reservation_lock)); 708 #ifdef CONFIG_QUOTA 709 ei->i_reserved_quota = 0; 710 #endif 711 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 712 ei->cur_aio_dio = NULL; 713 ei->i_sync_tid = 0; 714 ei->i_datasync_tid = 0; 715 716 return &ei->vfs_inode; 717 } 718 719 static void ext4_destroy_inode(struct inode *inode) 720 { 721 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 722 ext4_msg(inode->i_sb, KERN_ERR, 723 "Inode %lu (%p): orphan list check failed!", 724 inode->i_ino, EXT4_I(inode)); 725 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 726 EXT4_I(inode), sizeof(struct ext4_inode_info), 727 true); 728 dump_stack(); 729 } 730 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 731 } 732 733 static void init_once(void *foo) 734 { 735 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 736 737 INIT_LIST_HEAD(&ei->i_orphan); 738 #ifdef CONFIG_EXT4_FS_XATTR 739 init_rwsem(&ei->xattr_sem); 740 #endif 741 init_rwsem(&ei->i_data_sem); 742 inode_init_once(&ei->vfs_inode); 743 } 744 745 static int init_inodecache(void) 746 { 747 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 748 sizeof(struct ext4_inode_info), 749 0, (SLAB_RECLAIM_ACCOUNT| 750 SLAB_MEM_SPREAD), 751 init_once); 752 if (ext4_inode_cachep == NULL) 753 return -ENOMEM; 754 return 0; 755 } 756 757 static void destroy_inodecache(void) 758 { 759 kmem_cache_destroy(ext4_inode_cachep); 760 } 761 762 static void ext4_clear_inode(struct inode *inode) 763 { 764 ext4_discard_preallocations(inode); 765 if (EXT4_JOURNAL(inode)) 766 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 767 &EXT4_I(inode)->jinode); 768 } 769 770 static inline void ext4_show_quota_options(struct seq_file *seq, 771 struct super_block *sb) 772 { 773 #if defined(CONFIG_QUOTA) 774 struct ext4_sb_info *sbi = EXT4_SB(sb); 775 776 if (sbi->s_jquota_fmt) { 777 char *fmtname = ""; 778 779 switch (sbi->s_jquota_fmt) { 780 case QFMT_VFS_OLD: 781 fmtname = "vfsold"; 782 break; 783 case QFMT_VFS_V0: 784 fmtname = "vfsv0"; 785 break; 786 case QFMT_VFS_V1: 787 fmtname = "vfsv1"; 788 break; 789 } 790 seq_printf(seq, ",jqfmt=%s", fmtname); 791 } 792 793 if (sbi->s_qf_names[USRQUOTA]) 794 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 795 796 if (sbi->s_qf_names[GRPQUOTA]) 797 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 798 799 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 800 seq_puts(seq, ",usrquota"); 801 802 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 803 seq_puts(seq, ",grpquota"); 804 #endif 805 } 806 807 /* 808 * Show an option if 809 * - it's set to a non-default value OR 810 * - if the per-sb default is different from the global default 811 */ 812 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 813 { 814 int def_errors; 815 unsigned long def_mount_opts; 816 struct super_block *sb = vfs->mnt_sb; 817 struct ext4_sb_info *sbi = EXT4_SB(sb); 818 struct ext4_super_block *es = sbi->s_es; 819 820 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 821 def_errors = le16_to_cpu(es->s_errors); 822 823 if (sbi->s_sb_block != 1) 824 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 825 if (test_opt(sb, MINIX_DF)) 826 seq_puts(seq, ",minixdf"); 827 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 828 seq_puts(seq, ",grpid"); 829 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 830 seq_puts(seq, ",nogrpid"); 831 if (sbi->s_resuid != EXT4_DEF_RESUID || 832 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 833 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 834 } 835 if (sbi->s_resgid != EXT4_DEF_RESGID || 836 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 837 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 838 } 839 if (test_opt(sb, ERRORS_RO)) { 840 if (def_errors == EXT4_ERRORS_PANIC || 841 def_errors == EXT4_ERRORS_CONTINUE) { 842 seq_puts(seq, ",errors=remount-ro"); 843 } 844 } 845 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 846 seq_puts(seq, ",errors=continue"); 847 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 848 seq_puts(seq, ",errors=panic"); 849 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 850 seq_puts(seq, ",nouid32"); 851 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 852 seq_puts(seq, ",debug"); 853 if (test_opt(sb, OLDALLOC)) 854 seq_puts(seq, ",oldalloc"); 855 #ifdef CONFIG_EXT4_FS_XATTR 856 if (test_opt(sb, XATTR_USER) && 857 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 858 seq_puts(seq, ",user_xattr"); 859 if (!test_opt(sb, XATTR_USER) && 860 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 861 seq_puts(seq, ",nouser_xattr"); 862 } 863 #endif 864 #ifdef CONFIG_EXT4_FS_POSIX_ACL 865 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 866 seq_puts(seq, ",acl"); 867 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 868 seq_puts(seq, ",noacl"); 869 #endif 870 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 871 seq_printf(seq, ",commit=%u", 872 (unsigned) (sbi->s_commit_interval / HZ)); 873 } 874 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 875 seq_printf(seq, ",min_batch_time=%u", 876 (unsigned) sbi->s_min_batch_time); 877 } 878 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 879 seq_printf(seq, ",max_batch_time=%u", 880 (unsigned) sbi->s_min_batch_time); 881 } 882 883 /* 884 * We're changing the default of barrier mount option, so 885 * let's always display its mount state so it's clear what its 886 * status is. 887 */ 888 seq_puts(seq, ",barrier="); 889 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 890 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 891 seq_puts(seq, ",journal_async_commit"); 892 if (test_opt(sb, NOBH)) 893 seq_puts(seq, ",nobh"); 894 if (test_opt(sb, I_VERSION)) 895 seq_puts(seq, ",i_version"); 896 if (!test_opt(sb, DELALLOC)) 897 seq_puts(seq, ",nodelalloc"); 898 899 900 if (sbi->s_stripe) 901 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 902 /* 903 * journal mode get enabled in different ways 904 * So just print the value even if we didn't specify it 905 */ 906 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 907 seq_puts(seq, ",data=journal"); 908 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 909 seq_puts(seq, ",data=ordered"); 910 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 911 seq_puts(seq, ",data=writeback"); 912 913 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 914 seq_printf(seq, ",inode_readahead_blks=%u", 915 sbi->s_inode_readahead_blks); 916 917 if (test_opt(sb, DATA_ERR_ABORT)) 918 seq_puts(seq, ",data_err=abort"); 919 920 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 921 seq_puts(seq, ",noauto_da_alloc"); 922 923 if (test_opt(sb, DISCARD)) 924 seq_puts(seq, ",discard"); 925 926 if (test_opt(sb, NOLOAD)) 927 seq_puts(seq, ",norecovery"); 928 929 ext4_show_quota_options(seq, sb); 930 931 return 0; 932 } 933 934 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 935 u64 ino, u32 generation) 936 { 937 struct inode *inode; 938 939 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 940 return ERR_PTR(-ESTALE); 941 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 942 return ERR_PTR(-ESTALE); 943 944 /* iget isn't really right if the inode is currently unallocated!! 945 * 946 * ext4_read_inode will return a bad_inode if the inode had been 947 * deleted, so we should be safe. 948 * 949 * Currently we don't know the generation for parent directory, so 950 * a generation of 0 means "accept any" 951 */ 952 inode = ext4_iget(sb, ino); 953 if (IS_ERR(inode)) 954 return ERR_CAST(inode); 955 if (generation && inode->i_generation != generation) { 956 iput(inode); 957 return ERR_PTR(-ESTALE); 958 } 959 960 return inode; 961 } 962 963 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 964 int fh_len, int fh_type) 965 { 966 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 967 ext4_nfs_get_inode); 968 } 969 970 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 971 int fh_len, int fh_type) 972 { 973 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 974 ext4_nfs_get_inode); 975 } 976 977 /* 978 * Try to release metadata pages (indirect blocks, directories) which are 979 * mapped via the block device. Since these pages could have journal heads 980 * which would prevent try_to_free_buffers() from freeing them, we must use 981 * jbd2 layer's try_to_free_buffers() function to release them. 982 */ 983 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 984 gfp_t wait) 985 { 986 journal_t *journal = EXT4_SB(sb)->s_journal; 987 988 WARN_ON(PageChecked(page)); 989 if (!page_has_buffers(page)) 990 return 0; 991 if (journal) 992 return jbd2_journal_try_to_free_buffers(journal, page, 993 wait & ~__GFP_WAIT); 994 return try_to_free_buffers(page); 995 } 996 997 #ifdef CONFIG_QUOTA 998 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 999 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1000 1001 static int ext4_write_dquot(struct dquot *dquot); 1002 static int ext4_acquire_dquot(struct dquot *dquot); 1003 static int ext4_release_dquot(struct dquot *dquot); 1004 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1005 static int ext4_write_info(struct super_block *sb, int type); 1006 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1007 char *path, int remount); 1008 static int ext4_quota_on_mount(struct super_block *sb, int type); 1009 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1010 size_t len, loff_t off); 1011 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1012 const char *data, size_t len, loff_t off); 1013 1014 static const struct dquot_operations ext4_quota_operations = { 1015 .initialize = dquot_initialize, 1016 .drop = dquot_drop, 1017 .alloc_space = dquot_alloc_space, 1018 .reserve_space = dquot_reserve_space, 1019 .claim_space = dquot_claim_space, 1020 .release_rsv = dquot_release_reserved_space, 1021 #ifdef CONFIG_QUOTA 1022 .get_reserved_space = ext4_get_reserved_space, 1023 #endif 1024 .alloc_inode = dquot_alloc_inode, 1025 .free_space = dquot_free_space, 1026 .free_inode = dquot_free_inode, 1027 .transfer = dquot_transfer, 1028 .write_dquot = ext4_write_dquot, 1029 .acquire_dquot = ext4_acquire_dquot, 1030 .release_dquot = ext4_release_dquot, 1031 .mark_dirty = ext4_mark_dquot_dirty, 1032 .write_info = ext4_write_info, 1033 .alloc_dquot = dquot_alloc, 1034 .destroy_dquot = dquot_destroy, 1035 }; 1036 1037 static const struct quotactl_ops ext4_qctl_operations = { 1038 .quota_on = ext4_quota_on, 1039 .quota_off = vfs_quota_off, 1040 .quota_sync = vfs_quota_sync, 1041 .get_info = vfs_get_dqinfo, 1042 .set_info = vfs_set_dqinfo, 1043 .get_dqblk = vfs_get_dqblk, 1044 .set_dqblk = vfs_set_dqblk 1045 }; 1046 #endif 1047 1048 static const struct super_operations ext4_sops = { 1049 .alloc_inode = ext4_alloc_inode, 1050 .destroy_inode = ext4_destroy_inode, 1051 .write_inode = ext4_write_inode, 1052 .dirty_inode = ext4_dirty_inode, 1053 .delete_inode = ext4_delete_inode, 1054 .put_super = ext4_put_super, 1055 .sync_fs = ext4_sync_fs, 1056 .freeze_fs = ext4_freeze, 1057 .unfreeze_fs = ext4_unfreeze, 1058 .statfs = ext4_statfs, 1059 .remount_fs = ext4_remount, 1060 .clear_inode = ext4_clear_inode, 1061 .show_options = ext4_show_options, 1062 #ifdef CONFIG_QUOTA 1063 .quota_read = ext4_quota_read, 1064 .quota_write = ext4_quota_write, 1065 #endif 1066 .bdev_try_to_free_page = bdev_try_to_free_page, 1067 }; 1068 1069 static const struct super_operations ext4_nojournal_sops = { 1070 .alloc_inode = ext4_alloc_inode, 1071 .destroy_inode = ext4_destroy_inode, 1072 .write_inode = ext4_write_inode, 1073 .dirty_inode = ext4_dirty_inode, 1074 .delete_inode = ext4_delete_inode, 1075 .write_super = ext4_write_super, 1076 .put_super = ext4_put_super, 1077 .statfs = ext4_statfs, 1078 .remount_fs = ext4_remount, 1079 .clear_inode = ext4_clear_inode, 1080 .show_options = ext4_show_options, 1081 #ifdef CONFIG_QUOTA 1082 .quota_read = ext4_quota_read, 1083 .quota_write = ext4_quota_write, 1084 #endif 1085 .bdev_try_to_free_page = bdev_try_to_free_page, 1086 }; 1087 1088 static const struct export_operations ext4_export_ops = { 1089 .fh_to_dentry = ext4_fh_to_dentry, 1090 .fh_to_parent = ext4_fh_to_parent, 1091 .get_parent = ext4_get_parent, 1092 }; 1093 1094 enum { 1095 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1096 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1097 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1098 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1099 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1100 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1101 Opt_journal_update, Opt_journal_dev, 1102 Opt_journal_checksum, Opt_journal_async_commit, 1103 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1104 Opt_data_err_abort, Opt_data_err_ignore, 1105 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1106 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1107 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1108 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1109 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1110 Opt_block_validity, Opt_noblock_validity, 1111 Opt_inode_readahead_blks, Opt_journal_ioprio, 1112 Opt_discard, Opt_nodiscard, 1113 }; 1114 1115 static const match_table_t tokens = { 1116 {Opt_bsd_df, "bsddf"}, 1117 {Opt_minix_df, "minixdf"}, 1118 {Opt_grpid, "grpid"}, 1119 {Opt_grpid, "bsdgroups"}, 1120 {Opt_nogrpid, "nogrpid"}, 1121 {Opt_nogrpid, "sysvgroups"}, 1122 {Opt_resgid, "resgid=%u"}, 1123 {Opt_resuid, "resuid=%u"}, 1124 {Opt_sb, "sb=%u"}, 1125 {Opt_err_cont, "errors=continue"}, 1126 {Opt_err_panic, "errors=panic"}, 1127 {Opt_err_ro, "errors=remount-ro"}, 1128 {Opt_nouid32, "nouid32"}, 1129 {Opt_debug, "debug"}, 1130 {Opt_oldalloc, "oldalloc"}, 1131 {Opt_orlov, "orlov"}, 1132 {Opt_user_xattr, "user_xattr"}, 1133 {Opt_nouser_xattr, "nouser_xattr"}, 1134 {Opt_acl, "acl"}, 1135 {Opt_noacl, "noacl"}, 1136 {Opt_noload, "noload"}, 1137 {Opt_noload, "norecovery"}, 1138 {Opt_nobh, "nobh"}, 1139 {Opt_bh, "bh"}, 1140 {Opt_commit, "commit=%u"}, 1141 {Opt_min_batch_time, "min_batch_time=%u"}, 1142 {Opt_max_batch_time, "max_batch_time=%u"}, 1143 {Opt_journal_update, "journal=update"}, 1144 {Opt_journal_dev, "journal_dev=%u"}, 1145 {Opt_journal_checksum, "journal_checksum"}, 1146 {Opt_journal_async_commit, "journal_async_commit"}, 1147 {Opt_abort, "abort"}, 1148 {Opt_data_journal, "data=journal"}, 1149 {Opt_data_ordered, "data=ordered"}, 1150 {Opt_data_writeback, "data=writeback"}, 1151 {Opt_data_err_abort, "data_err=abort"}, 1152 {Opt_data_err_ignore, "data_err=ignore"}, 1153 {Opt_offusrjquota, "usrjquota="}, 1154 {Opt_usrjquota, "usrjquota=%s"}, 1155 {Opt_offgrpjquota, "grpjquota="}, 1156 {Opt_grpjquota, "grpjquota=%s"}, 1157 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1158 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1159 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1160 {Opt_grpquota, "grpquota"}, 1161 {Opt_noquota, "noquota"}, 1162 {Opt_quota, "quota"}, 1163 {Opt_usrquota, "usrquota"}, 1164 {Opt_barrier, "barrier=%u"}, 1165 {Opt_barrier, "barrier"}, 1166 {Opt_nobarrier, "nobarrier"}, 1167 {Opt_i_version, "i_version"}, 1168 {Opt_stripe, "stripe=%u"}, 1169 {Opt_resize, "resize"}, 1170 {Opt_delalloc, "delalloc"}, 1171 {Opt_nodelalloc, "nodelalloc"}, 1172 {Opt_block_validity, "block_validity"}, 1173 {Opt_noblock_validity, "noblock_validity"}, 1174 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1175 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1176 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1177 {Opt_auto_da_alloc, "auto_da_alloc"}, 1178 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1179 {Opt_discard, "discard"}, 1180 {Opt_nodiscard, "nodiscard"}, 1181 {Opt_err, NULL}, 1182 }; 1183 1184 static ext4_fsblk_t get_sb_block(void **data) 1185 { 1186 ext4_fsblk_t sb_block; 1187 char *options = (char *) *data; 1188 1189 if (!options || strncmp(options, "sb=", 3) != 0) 1190 return 1; /* Default location */ 1191 1192 options += 3; 1193 /* TODO: use simple_strtoll with >32bit ext4 */ 1194 sb_block = simple_strtoul(options, &options, 0); 1195 if (*options && *options != ',') { 1196 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1197 (char *) *data); 1198 return 1; 1199 } 1200 if (*options == ',') 1201 options++; 1202 *data = (void *) options; 1203 1204 return sb_block; 1205 } 1206 1207 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1208 1209 static int parse_options(char *options, struct super_block *sb, 1210 unsigned long *journal_devnum, 1211 unsigned int *journal_ioprio, 1212 ext4_fsblk_t *n_blocks_count, int is_remount) 1213 { 1214 struct ext4_sb_info *sbi = EXT4_SB(sb); 1215 char *p; 1216 substring_t args[MAX_OPT_ARGS]; 1217 int data_opt = 0; 1218 int option; 1219 #ifdef CONFIG_QUOTA 1220 int qtype, qfmt; 1221 char *qname; 1222 #endif 1223 1224 if (!options) 1225 return 1; 1226 1227 while ((p = strsep(&options, ",")) != NULL) { 1228 int token; 1229 if (!*p) 1230 continue; 1231 1232 token = match_token(p, tokens, args); 1233 switch (token) { 1234 case Opt_bsd_df: 1235 clear_opt(sbi->s_mount_opt, MINIX_DF); 1236 break; 1237 case Opt_minix_df: 1238 set_opt(sbi->s_mount_opt, MINIX_DF); 1239 break; 1240 case Opt_grpid: 1241 set_opt(sbi->s_mount_opt, GRPID); 1242 break; 1243 case Opt_nogrpid: 1244 clear_opt(sbi->s_mount_opt, GRPID); 1245 break; 1246 case Opt_resuid: 1247 if (match_int(&args[0], &option)) 1248 return 0; 1249 sbi->s_resuid = option; 1250 break; 1251 case Opt_resgid: 1252 if (match_int(&args[0], &option)) 1253 return 0; 1254 sbi->s_resgid = option; 1255 break; 1256 case Opt_sb: 1257 /* handled by get_sb_block() instead of here */ 1258 /* *sb_block = match_int(&args[0]); */ 1259 break; 1260 case Opt_err_panic: 1261 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1262 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1263 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1264 break; 1265 case Opt_err_ro: 1266 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1267 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1268 set_opt(sbi->s_mount_opt, ERRORS_RO); 1269 break; 1270 case Opt_err_cont: 1271 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1272 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1273 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1274 break; 1275 case Opt_nouid32: 1276 set_opt(sbi->s_mount_opt, NO_UID32); 1277 break; 1278 case Opt_debug: 1279 set_opt(sbi->s_mount_opt, DEBUG); 1280 break; 1281 case Opt_oldalloc: 1282 set_opt(sbi->s_mount_opt, OLDALLOC); 1283 break; 1284 case Opt_orlov: 1285 clear_opt(sbi->s_mount_opt, OLDALLOC); 1286 break; 1287 #ifdef CONFIG_EXT4_FS_XATTR 1288 case Opt_user_xattr: 1289 set_opt(sbi->s_mount_opt, XATTR_USER); 1290 break; 1291 case Opt_nouser_xattr: 1292 clear_opt(sbi->s_mount_opt, XATTR_USER); 1293 break; 1294 #else 1295 case Opt_user_xattr: 1296 case Opt_nouser_xattr: 1297 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1298 break; 1299 #endif 1300 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1301 case Opt_acl: 1302 set_opt(sbi->s_mount_opt, POSIX_ACL); 1303 break; 1304 case Opt_noacl: 1305 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1306 break; 1307 #else 1308 case Opt_acl: 1309 case Opt_noacl: 1310 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1311 break; 1312 #endif 1313 case Opt_journal_update: 1314 /* @@@ FIXME */ 1315 /* Eventually we will want to be able to create 1316 a journal file here. For now, only allow the 1317 user to specify an existing inode to be the 1318 journal file. */ 1319 if (is_remount) { 1320 ext4_msg(sb, KERN_ERR, 1321 "Cannot specify journal on remount"); 1322 return 0; 1323 } 1324 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1325 break; 1326 case Opt_journal_dev: 1327 if (is_remount) { 1328 ext4_msg(sb, KERN_ERR, 1329 "Cannot specify journal on remount"); 1330 return 0; 1331 } 1332 if (match_int(&args[0], &option)) 1333 return 0; 1334 *journal_devnum = option; 1335 break; 1336 case Opt_journal_checksum: 1337 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1338 break; 1339 case Opt_journal_async_commit: 1340 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1341 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1342 break; 1343 case Opt_noload: 1344 set_opt(sbi->s_mount_opt, NOLOAD); 1345 break; 1346 case Opt_commit: 1347 if (match_int(&args[0], &option)) 1348 return 0; 1349 if (option < 0) 1350 return 0; 1351 if (option == 0) 1352 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1353 sbi->s_commit_interval = HZ * option; 1354 break; 1355 case Opt_max_batch_time: 1356 if (match_int(&args[0], &option)) 1357 return 0; 1358 if (option < 0) 1359 return 0; 1360 if (option == 0) 1361 option = EXT4_DEF_MAX_BATCH_TIME; 1362 sbi->s_max_batch_time = option; 1363 break; 1364 case Opt_min_batch_time: 1365 if (match_int(&args[0], &option)) 1366 return 0; 1367 if (option < 0) 1368 return 0; 1369 sbi->s_min_batch_time = option; 1370 break; 1371 case Opt_data_journal: 1372 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1373 goto datacheck; 1374 case Opt_data_ordered: 1375 data_opt = EXT4_MOUNT_ORDERED_DATA; 1376 goto datacheck; 1377 case Opt_data_writeback: 1378 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1379 datacheck: 1380 if (is_remount) { 1381 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1382 != data_opt) { 1383 ext4_msg(sb, KERN_ERR, 1384 "Cannot change data mode on remount"); 1385 return 0; 1386 } 1387 } else { 1388 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 1389 sbi->s_mount_opt |= data_opt; 1390 } 1391 break; 1392 case Opt_data_err_abort: 1393 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1394 break; 1395 case Opt_data_err_ignore: 1396 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1397 break; 1398 #ifdef CONFIG_QUOTA 1399 case Opt_usrjquota: 1400 qtype = USRQUOTA; 1401 goto set_qf_name; 1402 case Opt_grpjquota: 1403 qtype = GRPQUOTA; 1404 set_qf_name: 1405 if (sb_any_quota_loaded(sb) && 1406 !sbi->s_qf_names[qtype]) { 1407 ext4_msg(sb, KERN_ERR, 1408 "Cannot change journaled " 1409 "quota options when quota turned on"); 1410 return 0; 1411 } 1412 qname = match_strdup(&args[0]); 1413 if (!qname) { 1414 ext4_msg(sb, KERN_ERR, 1415 "Not enough memory for " 1416 "storing quotafile name"); 1417 return 0; 1418 } 1419 if (sbi->s_qf_names[qtype] && 1420 strcmp(sbi->s_qf_names[qtype], qname)) { 1421 ext4_msg(sb, KERN_ERR, 1422 "%s quota file already " 1423 "specified", QTYPE2NAME(qtype)); 1424 kfree(qname); 1425 return 0; 1426 } 1427 sbi->s_qf_names[qtype] = qname; 1428 if (strchr(sbi->s_qf_names[qtype], '/')) { 1429 ext4_msg(sb, KERN_ERR, 1430 "quotafile must be on " 1431 "filesystem root"); 1432 kfree(sbi->s_qf_names[qtype]); 1433 sbi->s_qf_names[qtype] = NULL; 1434 return 0; 1435 } 1436 set_opt(sbi->s_mount_opt, QUOTA); 1437 break; 1438 case Opt_offusrjquota: 1439 qtype = USRQUOTA; 1440 goto clear_qf_name; 1441 case Opt_offgrpjquota: 1442 qtype = GRPQUOTA; 1443 clear_qf_name: 1444 if (sb_any_quota_loaded(sb) && 1445 sbi->s_qf_names[qtype]) { 1446 ext4_msg(sb, KERN_ERR, "Cannot change " 1447 "journaled quota options when " 1448 "quota turned on"); 1449 return 0; 1450 } 1451 /* 1452 * The space will be released later when all options 1453 * are confirmed to be correct 1454 */ 1455 sbi->s_qf_names[qtype] = NULL; 1456 break; 1457 case Opt_jqfmt_vfsold: 1458 qfmt = QFMT_VFS_OLD; 1459 goto set_qf_format; 1460 case Opt_jqfmt_vfsv0: 1461 qfmt = QFMT_VFS_V0; 1462 goto set_qf_format; 1463 case Opt_jqfmt_vfsv1: 1464 qfmt = QFMT_VFS_V1; 1465 set_qf_format: 1466 if (sb_any_quota_loaded(sb) && 1467 sbi->s_jquota_fmt != qfmt) { 1468 ext4_msg(sb, KERN_ERR, "Cannot change " 1469 "journaled quota options when " 1470 "quota turned on"); 1471 return 0; 1472 } 1473 sbi->s_jquota_fmt = qfmt; 1474 break; 1475 case Opt_quota: 1476 case Opt_usrquota: 1477 set_opt(sbi->s_mount_opt, QUOTA); 1478 set_opt(sbi->s_mount_opt, USRQUOTA); 1479 break; 1480 case Opt_grpquota: 1481 set_opt(sbi->s_mount_opt, QUOTA); 1482 set_opt(sbi->s_mount_opt, GRPQUOTA); 1483 break; 1484 case Opt_noquota: 1485 if (sb_any_quota_loaded(sb)) { 1486 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1487 "options when quota turned on"); 1488 return 0; 1489 } 1490 clear_opt(sbi->s_mount_opt, QUOTA); 1491 clear_opt(sbi->s_mount_opt, USRQUOTA); 1492 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1493 break; 1494 #else 1495 case Opt_quota: 1496 case Opt_usrquota: 1497 case Opt_grpquota: 1498 ext4_msg(sb, KERN_ERR, 1499 "quota options not supported"); 1500 break; 1501 case Opt_usrjquota: 1502 case Opt_grpjquota: 1503 case Opt_offusrjquota: 1504 case Opt_offgrpjquota: 1505 case Opt_jqfmt_vfsold: 1506 case Opt_jqfmt_vfsv0: 1507 case Opt_jqfmt_vfsv1: 1508 ext4_msg(sb, KERN_ERR, 1509 "journaled quota options not supported"); 1510 break; 1511 case Opt_noquota: 1512 break; 1513 #endif 1514 case Opt_abort: 1515 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1516 break; 1517 case Opt_nobarrier: 1518 clear_opt(sbi->s_mount_opt, BARRIER); 1519 break; 1520 case Opt_barrier: 1521 if (match_int(&args[0], &option)) { 1522 set_opt(sbi->s_mount_opt, BARRIER); 1523 break; 1524 } 1525 if (option) 1526 set_opt(sbi->s_mount_opt, BARRIER); 1527 else 1528 clear_opt(sbi->s_mount_opt, BARRIER); 1529 break; 1530 case Opt_ignore: 1531 break; 1532 case Opt_resize: 1533 if (!is_remount) { 1534 ext4_msg(sb, KERN_ERR, 1535 "resize option only available " 1536 "for remount"); 1537 return 0; 1538 } 1539 if (match_int(&args[0], &option) != 0) 1540 return 0; 1541 *n_blocks_count = option; 1542 break; 1543 case Opt_nobh: 1544 set_opt(sbi->s_mount_opt, NOBH); 1545 break; 1546 case Opt_bh: 1547 clear_opt(sbi->s_mount_opt, NOBH); 1548 break; 1549 case Opt_i_version: 1550 set_opt(sbi->s_mount_opt, I_VERSION); 1551 sb->s_flags |= MS_I_VERSION; 1552 break; 1553 case Opt_nodelalloc: 1554 clear_opt(sbi->s_mount_opt, DELALLOC); 1555 break; 1556 case Opt_stripe: 1557 if (match_int(&args[0], &option)) 1558 return 0; 1559 if (option < 0) 1560 return 0; 1561 sbi->s_stripe = option; 1562 break; 1563 case Opt_delalloc: 1564 set_opt(sbi->s_mount_opt, DELALLOC); 1565 break; 1566 case Opt_block_validity: 1567 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1568 break; 1569 case Opt_noblock_validity: 1570 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1571 break; 1572 case Opt_inode_readahead_blks: 1573 if (match_int(&args[0], &option)) 1574 return 0; 1575 if (option < 0 || option > (1 << 30)) 1576 return 0; 1577 if (!is_power_of_2(option)) { 1578 ext4_msg(sb, KERN_ERR, 1579 "EXT4-fs: inode_readahead_blks" 1580 " must be a power of 2"); 1581 return 0; 1582 } 1583 sbi->s_inode_readahead_blks = option; 1584 break; 1585 case Opt_journal_ioprio: 1586 if (match_int(&args[0], &option)) 1587 return 0; 1588 if (option < 0 || option > 7) 1589 break; 1590 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1591 option); 1592 break; 1593 case Opt_noauto_da_alloc: 1594 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1595 break; 1596 case Opt_auto_da_alloc: 1597 if (match_int(&args[0], &option)) { 1598 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1599 break; 1600 } 1601 if (option) 1602 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1603 else 1604 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1605 break; 1606 case Opt_discard: 1607 set_opt(sbi->s_mount_opt, DISCARD); 1608 break; 1609 case Opt_nodiscard: 1610 clear_opt(sbi->s_mount_opt, DISCARD); 1611 break; 1612 default: 1613 ext4_msg(sb, KERN_ERR, 1614 "Unrecognized mount option \"%s\" " 1615 "or missing value", p); 1616 return 0; 1617 } 1618 } 1619 #ifdef CONFIG_QUOTA 1620 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1621 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1622 sbi->s_qf_names[USRQUOTA]) 1623 clear_opt(sbi->s_mount_opt, USRQUOTA); 1624 1625 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1626 sbi->s_qf_names[GRPQUOTA]) 1627 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1628 1629 if ((sbi->s_qf_names[USRQUOTA] && 1630 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1631 (sbi->s_qf_names[GRPQUOTA] && 1632 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1633 ext4_msg(sb, KERN_ERR, "old and new quota " 1634 "format mixing"); 1635 return 0; 1636 } 1637 1638 if (!sbi->s_jquota_fmt) { 1639 ext4_msg(sb, KERN_ERR, "journaled quota format " 1640 "not specified"); 1641 return 0; 1642 } 1643 } else { 1644 if (sbi->s_jquota_fmt) { 1645 ext4_msg(sb, KERN_ERR, "journaled quota format " 1646 "specified with no journaling " 1647 "enabled"); 1648 return 0; 1649 } 1650 } 1651 #endif 1652 return 1; 1653 } 1654 1655 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1656 int read_only) 1657 { 1658 struct ext4_sb_info *sbi = EXT4_SB(sb); 1659 int res = 0; 1660 1661 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1662 ext4_msg(sb, KERN_ERR, "revision level too high, " 1663 "forcing read-only mode"); 1664 res = MS_RDONLY; 1665 } 1666 if (read_only) 1667 return res; 1668 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1669 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1670 "running e2fsck is recommended"); 1671 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1672 ext4_msg(sb, KERN_WARNING, 1673 "warning: mounting fs with errors, " 1674 "running e2fsck is recommended"); 1675 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1676 le16_to_cpu(es->s_mnt_count) >= 1677 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1678 ext4_msg(sb, KERN_WARNING, 1679 "warning: maximal mount count reached, " 1680 "running e2fsck is recommended"); 1681 else if (le32_to_cpu(es->s_checkinterval) && 1682 (le32_to_cpu(es->s_lastcheck) + 1683 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1684 ext4_msg(sb, KERN_WARNING, 1685 "warning: checktime reached, " 1686 "running e2fsck is recommended"); 1687 if (!sbi->s_journal) 1688 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1689 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1690 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1691 le16_add_cpu(&es->s_mnt_count, 1); 1692 es->s_mtime = cpu_to_le32(get_seconds()); 1693 ext4_update_dynamic_rev(sb); 1694 if (sbi->s_journal) 1695 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1696 1697 ext4_commit_super(sb, 1); 1698 if (test_opt(sb, DEBUG)) 1699 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1700 "bpg=%lu, ipg=%lu, mo=%04x]\n", 1701 sb->s_blocksize, 1702 sbi->s_groups_count, 1703 EXT4_BLOCKS_PER_GROUP(sb), 1704 EXT4_INODES_PER_GROUP(sb), 1705 sbi->s_mount_opt); 1706 1707 return res; 1708 } 1709 1710 static int ext4_fill_flex_info(struct super_block *sb) 1711 { 1712 struct ext4_sb_info *sbi = EXT4_SB(sb); 1713 struct ext4_group_desc *gdp = NULL; 1714 ext4_group_t flex_group_count; 1715 ext4_group_t flex_group; 1716 int groups_per_flex = 0; 1717 size_t size; 1718 int i; 1719 1720 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1721 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1722 1723 if (groups_per_flex < 2) { 1724 sbi->s_log_groups_per_flex = 0; 1725 return 1; 1726 } 1727 1728 /* We allocate both existing and potentially added groups */ 1729 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1730 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1731 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1732 size = flex_group_count * sizeof(struct flex_groups); 1733 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1734 if (sbi->s_flex_groups == NULL) { 1735 sbi->s_flex_groups = vmalloc(size); 1736 if (sbi->s_flex_groups) 1737 memset(sbi->s_flex_groups, 0, size); 1738 } 1739 if (sbi->s_flex_groups == NULL) { 1740 ext4_msg(sb, KERN_ERR, "not enough memory for " 1741 "%u flex groups", flex_group_count); 1742 goto failed; 1743 } 1744 1745 for (i = 0; i < sbi->s_groups_count; i++) { 1746 gdp = ext4_get_group_desc(sb, i, NULL); 1747 1748 flex_group = ext4_flex_group(sbi, i); 1749 atomic_add(ext4_free_inodes_count(sb, gdp), 1750 &sbi->s_flex_groups[flex_group].free_inodes); 1751 atomic_add(ext4_free_blks_count(sb, gdp), 1752 &sbi->s_flex_groups[flex_group].free_blocks); 1753 atomic_add(ext4_used_dirs_count(sb, gdp), 1754 &sbi->s_flex_groups[flex_group].used_dirs); 1755 } 1756 1757 return 1; 1758 failed: 1759 return 0; 1760 } 1761 1762 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1763 struct ext4_group_desc *gdp) 1764 { 1765 __u16 crc = 0; 1766 1767 if (sbi->s_es->s_feature_ro_compat & 1768 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1769 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1770 __le32 le_group = cpu_to_le32(block_group); 1771 1772 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1773 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1774 crc = crc16(crc, (__u8 *)gdp, offset); 1775 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1776 /* for checksum of struct ext4_group_desc do the rest...*/ 1777 if ((sbi->s_es->s_feature_incompat & 1778 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1779 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1780 crc = crc16(crc, (__u8 *)gdp + offset, 1781 le16_to_cpu(sbi->s_es->s_desc_size) - 1782 offset); 1783 } 1784 1785 return cpu_to_le16(crc); 1786 } 1787 1788 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1789 struct ext4_group_desc *gdp) 1790 { 1791 if ((sbi->s_es->s_feature_ro_compat & 1792 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1793 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1794 return 0; 1795 1796 return 1; 1797 } 1798 1799 /* Called at mount-time, super-block is locked */ 1800 static int ext4_check_descriptors(struct super_block *sb) 1801 { 1802 struct ext4_sb_info *sbi = EXT4_SB(sb); 1803 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1804 ext4_fsblk_t last_block; 1805 ext4_fsblk_t block_bitmap; 1806 ext4_fsblk_t inode_bitmap; 1807 ext4_fsblk_t inode_table; 1808 int flexbg_flag = 0; 1809 ext4_group_t i; 1810 1811 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1812 flexbg_flag = 1; 1813 1814 ext4_debug("Checking group descriptors"); 1815 1816 for (i = 0; i < sbi->s_groups_count; i++) { 1817 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1818 1819 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1820 last_block = ext4_blocks_count(sbi->s_es) - 1; 1821 else 1822 last_block = first_block + 1823 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1824 1825 block_bitmap = ext4_block_bitmap(sb, gdp); 1826 if (block_bitmap < first_block || block_bitmap > last_block) { 1827 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1828 "Block bitmap for group %u not in group " 1829 "(block %llu)!", i, block_bitmap); 1830 return 0; 1831 } 1832 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1833 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1834 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1835 "Inode bitmap for group %u not in group " 1836 "(block %llu)!", i, inode_bitmap); 1837 return 0; 1838 } 1839 inode_table = ext4_inode_table(sb, gdp); 1840 if (inode_table < first_block || 1841 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1842 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1843 "Inode table for group %u not in group " 1844 "(block %llu)!", i, inode_table); 1845 return 0; 1846 } 1847 ext4_lock_group(sb, i); 1848 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1849 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1850 "Checksum for group %u failed (%u!=%u)", 1851 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1852 gdp)), le16_to_cpu(gdp->bg_checksum)); 1853 if (!(sb->s_flags & MS_RDONLY)) { 1854 ext4_unlock_group(sb, i); 1855 return 0; 1856 } 1857 } 1858 ext4_unlock_group(sb, i); 1859 if (!flexbg_flag) 1860 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1861 } 1862 1863 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1864 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 1865 return 1; 1866 } 1867 1868 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1869 * the superblock) which were deleted from all directories, but held open by 1870 * a process at the time of a crash. We walk the list and try to delete these 1871 * inodes at recovery time (only with a read-write filesystem). 1872 * 1873 * In order to keep the orphan inode chain consistent during traversal (in 1874 * case of crash during recovery), we link each inode into the superblock 1875 * orphan list_head and handle it the same way as an inode deletion during 1876 * normal operation (which journals the operations for us). 1877 * 1878 * We only do an iget() and an iput() on each inode, which is very safe if we 1879 * accidentally point at an in-use or already deleted inode. The worst that 1880 * can happen in this case is that we get a "bit already cleared" message from 1881 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1882 * e2fsck was run on this filesystem, and it must have already done the orphan 1883 * inode cleanup for us, so we can safely abort without any further action. 1884 */ 1885 static void ext4_orphan_cleanup(struct super_block *sb, 1886 struct ext4_super_block *es) 1887 { 1888 unsigned int s_flags = sb->s_flags; 1889 int nr_orphans = 0, nr_truncates = 0; 1890 #ifdef CONFIG_QUOTA 1891 int i; 1892 #endif 1893 if (!es->s_last_orphan) { 1894 jbd_debug(4, "no orphan inodes to clean up\n"); 1895 return; 1896 } 1897 1898 if (bdev_read_only(sb->s_bdev)) { 1899 ext4_msg(sb, KERN_ERR, "write access " 1900 "unavailable, skipping orphan cleanup"); 1901 return; 1902 } 1903 1904 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1905 if (es->s_last_orphan) 1906 jbd_debug(1, "Errors on filesystem, " 1907 "clearing orphan list.\n"); 1908 es->s_last_orphan = 0; 1909 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1910 return; 1911 } 1912 1913 if (s_flags & MS_RDONLY) { 1914 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1915 sb->s_flags &= ~MS_RDONLY; 1916 } 1917 #ifdef CONFIG_QUOTA 1918 /* Needed for iput() to work correctly and not trash data */ 1919 sb->s_flags |= MS_ACTIVE; 1920 /* Turn on quotas so that they are updated correctly */ 1921 for (i = 0; i < MAXQUOTAS; i++) { 1922 if (EXT4_SB(sb)->s_qf_names[i]) { 1923 int ret = ext4_quota_on_mount(sb, i); 1924 if (ret < 0) 1925 ext4_msg(sb, KERN_ERR, 1926 "Cannot turn on journaled " 1927 "quota: error %d", ret); 1928 } 1929 } 1930 #endif 1931 1932 while (es->s_last_orphan) { 1933 struct inode *inode; 1934 1935 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1936 if (IS_ERR(inode)) { 1937 es->s_last_orphan = 0; 1938 break; 1939 } 1940 1941 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1942 vfs_dq_init(inode); 1943 if (inode->i_nlink) { 1944 ext4_msg(sb, KERN_DEBUG, 1945 "%s: truncating inode %lu to %lld bytes", 1946 __func__, inode->i_ino, inode->i_size); 1947 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1948 inode->i_ino, inode->i_size); 1949 ext4_truncate(inode); 1950 nr_truncates++; 1951 } else { 1952 ext4_msg(sb, KERN_DEBUG, 1953 "%s: deleting unreferenced inode %lu", 1954 __func__, inode->i_ino); 1955 jbd_debug(2, "deleting unreferenced inode %lu\n", 1956 inode->i_ino); 1957 nr_orphans++; 1958 } 1959 iput(inode); /* The delete magic happens here! */ 1960 } 1961 1962 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1963 1964 if (nr_orphans) 1965 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 1966 PLURAL(nr_orphans)); 1967 if (nr_truncates) 1968 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 1969 PLURAL(nr_truncates)); 1970 #ifdef CONFIG_QUOTA 1971 /* Turn quotas off */ 1972 for (i = 0; i < MAXQUOTAS; i++) { 1973 if (sb_dqopt(sb)->files[i]) 1974 vfs_quota_off(sb, i, 0); 1975 } 1976 #endif 1977 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1978 } 1979 1980 /* 1981 * Maximal extent format file size. 1982 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1983 * extent format containers, within a sector_t, and within i_blocks 1984 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 1985 * so that won't be a limiting factor. 1986 * 1987 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 1988 */ 1989 static loff_t ext4_max_size(int blkbits, int has_huge_files) 1990 { 1991 loff_t res; 1992 loff_t upper_limit = MAX_LFS_FILESIZE; 1993 1994 /* small i_blocks in vfs inode? */ 1995 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1996 /* 1997 * CONFIG_LBDAF is not enabled implies the inode 1998 * i_block represent total blocks in 512 bytes 1999 * 32 == size of vfs inode i_blocks * 8 2000 */ 2001 upper_limit = (1LL << 32) - 1; 2002 2003 /* total blocks in file system block size */ 2004 upper_limit >>= (blkbits - 9); 2005 upper_limit <<= blkbits; 2006 } 2007 2008 /* 32-bit extent-start container, ee_block */ 2009 res = 1LL << 32; 2010 res <<= blkbits; 2011 res -= 1; 2012 2013 /* Sanity check against vm- & vfs- imposed limits */ 2014 if (res > upper_limit) 2015 res = upper_limit; 2016 2017 return res; 2018 } 2019 2020 /* 2021 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2022 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2023 * We need to be 1 filesystem block less than the 2^48 sector limit. 2024 */ 2025 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2026 { 2027 loff_t res = EXT4_NDIR_BLOCKS; 2028 int meta_blocks; 2029 loff_t upper_limit; 2030 /* This is calculated to be the largest file size for a dense, block 2031 * mapped file such that the file's total number of 512-byte sectors, 2032 * including data and all indirect blocks, does not exceed (2^48 - 1). 2033 * 2034 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2035 * number of 512-byte sectors of the file. 2036 */ 2037 2038 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2039 /* 2040 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2041 * the inode i_block field represents total file blocks in 2042 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2043 */ 2044 upper_limit = (1LL << 32) - 1; 2045 2046 /* total blocks in file system block size */ 2047 upper_limit >>= (bits - 9); 2048 2049 } else { 2050 /* 2051 * We use 48 bit ext4_inode i_blocks 2052 * With EXT4_HUGE_FILE_FL set the i_blocks 2053 * represent total number of blocks in 2054 * file system block size 2055 */ 2056 upper_limit = (1LL << 48) - 1; 2057 2058 } 2059 2060 /* indirect blocks */ 2061 meta_blocks = 1; 2062 /* double indirect blocks */ 2063 meta_blocks += 1 + (1LL << (bits-2)); 2064 /* tripple indirect blocks */ 2065 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2066 2067 upper_limit -= meta_blocks; 2068 upper_limit <<= bits; 2069 2070 res += 1LL << (bits-2); 2071 res += 1LL << (2*(bits-2)); 2072 res += 1LL << (3*(bits-2)); 2073 res <<= bits; 2074 if (res > upper_limit) 2075 res = upper_limit; 2076 2077 if (res > MAX_LFS_FILESIZE) 2078 res = MAX_LFS_FILESIZE; 2079 2080 return res; 2081 } 2082 2083 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2084 ext4_fsblk_t logical_sb_block, int nr) 2085 { 2086 struct ext4_sb_info *sbi = EXT4_SB(sb); 2087 ext4_group_t bg, first_meta_bg; 2088 int has_super = 0; 2089 2090 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2091 2092 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2093 nr < first_meta_bg) 2094 return logical_sb_block + nr + 1; 2095 bg = sbi->s_desc_per_block * nr; 2096 if (ext4_bg_has_super(sb, bg)) 2097 has_super = 1; 2098 2099 return (has_super + ext4_group_first_block_no(sb, bg)); 2100 } 2101 2102 /** 2103 * ext4_get_stripe_size: Get the stripe size. 2104 * @sbi: In memory super block info 2105 * 2106 * If we have specified it via mount option, then 2107 * use the mount option value. If the value specified at mount time is 2108 * greater than the blocks per group use the super block value. 2109 * If the super block value is greater than blocks per group return 0. 2110 * Allocator needs it be less than blocks per group. 2111 * 2112 */ 2113 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2114 { 2115 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2116 unsigned long stripe_width = 2117 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2118 2119 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2120 return sbi->s_stripe; 2121 2122 if (stripe_width <= sbi->s_blocks_per_group) 2123 return stripe_width; 2124 2125 if (stride <= sbi->s_blocks_per_group) 2126 return stride; 2127 2128 return 0; 2129 } 2130 2131 /* sysfs supprt */ 2132 2133 struct ext4_attr { 2134 struct attribute attr; 2135 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2136 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2137 const char *, size_t); 2138 int offset; 2139 }; 2140 2141 static int parse_strtoul(const char *buf, 2142 unsigned long max, unsigned long *value) 2143 { 2144 char *endp; 2145 2146 *value = simple_strtoul(skip_spaces(buf), &endp, 0); 2147 endp = skip_spaces(endp); 2148 if (*endp || *value > max) 2149 return -EINVAL; 2150 2151 return 0; 2152 } 2153 2154 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2155 struct ext4_sb_info *sbi, 2156 char *buf) 2157 { 2158 return snprintf(buf, PAGE_SIZE, "%llu\n", 2159 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2160 } 2161 2162 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2163 struct ext4_sb_info *sbi, char *buf) 2164 { 2165 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2166 2167 return snprintf(buf, PAGE_SIZE, "%lu\n", 2168 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2169 sbi->s_sectors_written_start) >> 1); 2170 } 2171 2172 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2173 struct ext4_sb_info *sbi, char *buf) 2174 { 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2176 2177 return snprintf(buf, PAGE_SIZE, "%llu\n", 2178 (unsigned long long)(sbi->s_kbytes_written + 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2181 } 2182 2183 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2184 struct ext4_sb_info *sbi, 2185 const char *buf, size_t count) 2186 { 2187 unsigned long t; 2188 2189 if (parse_strtoul(buf, 0x40000000, &t)) 2190 return -EINVAL; 2191 2192 if (!is_power_of_2(t)) 2193 return -EINVAL; 2194 2195 sbi->s_inode_readahead_blks = t; 2196 return count; 2197 } 2198 2199 static ssize_t sbi_ui_show(struct ext4_attr *a, 2200 struct ext4_sb_info *sbi, char *buf) 2201 { 2202 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2203 2204 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2205 } 2206 2207 static ssize_t sbi_ui_store(struct ext4_attr *a, 2208 struct ext4_sb_info *sbi, 2209 const char *buf, size_t count) 2210 { 2211 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2212 unsigned long t; 2213 2214 if (parse_strtoul(buf, 0xffffffff, &t)) 2215 return -EINVAL; 2216 *ui = t; 2217 return count; 2218 } 2219 2220 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2221 static struct ext4_attr ext4_attr_##_name = { \ 2222 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2223 .show = _show, \ 2224 .store = _store, \ 2225 .offset = offsetof(struct ext4_sb_info, _elname), \ 2226 } 2227 #define EXT4_ATTR(name, mode, show, store) \ 2228 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2229 2230 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2231 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2232 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2233 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2234 #define ATTR_LIST(name) &ext4_attr_##name.attr 2235 2236 EXT4_RO_ATTR(delayed_allocation_blocks); 2237 EXT4_RO_ATTR(session_write_kbytes); 2238 EXT4_RO_ATTR(lifetime_write_kbytes); 2239 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2240 inode_readahead_blks_store, s_inode_readahead_blks); 2241 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2242 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2243 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2244 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2245 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2246 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2247 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2248 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2249 2250 static struct attribute *ext4_attrs[] = { 2251 ATTR_LIST(delayed_allocation_blocks), 2252 ATTR_LIST(session_write_kbytes), 2253 ATTR_LIST(lifetime_write_kbytes), 2254 ATTR_LIST(inode_readahead_blks), 2255 ATTR_LIST(inode_goal), 2256 ATTR_LIST(mb_stats), 2257 ATTR_LIST(mb_max_to_scan), 2258 ATTR_LIST(mb_min_to_scan), 2259 ATTR_LIST(mb_order2_req), 2260 ATTR_LIST(mb_stream_req), 2261 ATTR_LIST(mb_group_prealloc), 2262 ATTR_LIST(max_writeback_mb_bump), 2263 NULL, 2264 }; 2265 2266 static ssize_t ext4_attr_show(struct kobject *kobj, 2267 struct attribute *attr, char *buf) 2268 { 2269 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2270 s_kobj); 2271 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2272 2273 return a->show ? a->show(a, sbi, buf) : 0; 2274 } 2275 2276 static ssize_t ext4_attr_store(struct kobject *kobj, 2277 struct attribute *attr, 2278 const char *buf, size_t len) 2279 { 2280 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2281 s_kobj); 2282 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2283 2284 return a->store ? a->store(a, sbi, buf, len) : 0; 2285 } 2286 2287 static void ext4_sb_release(struct kobject *kobj) 2288 { 2289 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2290 s_kobj); 2291 complete(&sbi->s_kobj_unregister); 2292 } 2293 2294 2295 static struct sysfs_ops ext4_attr_ops = { 2296 .show = ext4_attr_show, 2297 .store = ext4_attr_store, 2298 }; 2299 2300 static struct kobj_type ext4_ktype = { 2301 .default_attrs = ext4_attrs, 2302 .sysfs_ops = &ext4_attr_ops, 2303 .release = ext4_sb_release, 2304 }; 2305 2306 /* 2307 * Check whether this filesystem can be mounted based on 2308 * the features present and the RDONLY/RDWR mount requested. 2309 * Returns 1 if this filesystem can be mounted as requested, 2310 * 0 if it cannot be. 2311 */ 2312 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2313 { 2314 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2315 ext4_msg(sb, KERN_ERR, 2316 "Couldn't mount because of " 2317 "unsupported optional features (%x)", 2318 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2319 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2320 return 0; 2321 } 2322 2323 if (readonly) 2324 return 1; 2325 2326 /* Check that feature set is OK for a read-write mount */ 2327 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2328 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2329 "unsupported optional features (%x)", 2330 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2331 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2332 return 0; 2333 } 2334 /* 2335 * Large file size enabled file system can only be mounted 2336 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2337 */ 2338 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2339 if (sizeof(blkcnt_t) < sizeof(u64)) { 2340 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2341 "cannot be mounted RDWR without " 2342 "CONFIG_LBDAF"); 2343 return 0; 2344 } 2345 } 2346 return 1; 2347 } 2348 2349 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2350 __releases(kernel_lock) 2351 __acquires(kernel_lock) 2352 { 2353 struct buffer_head *bh; 2354 struct ext4_super_block *es = NULL; 2355 struct ext4_sb_info *sbi; 2356 ext4_fsblk_t block; 2357 ext4_fsblk_t sb_block = get_sb_block(&data); 2358 ext4_fsblk_t logical_sb_block; 2359 unsigned long offset = 0; 2360 unsigned long journal_devnum = 0; 2361 unsigned long def_mount_opts; 2362 struct inode *root; 2363 char *cp; 2364 const char *descr; 2365 int ret = -EINVAL; 2366 int blocksize; 2367 unsigned int db_count; 2368 unsigned int i; 2369 int needs_recovery, has_huge_files; 2370 __u64 blocks_count; 2371 int err; 2372 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2373 2374 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2375 if (!sbi) 2376 return -ENOMEM; 2377 2378 sbi->s_blockgroup_lock = 2379 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 2380 if (!sbi->s_blockgroup_lock) { 2381 kfree(sbi); 2382 return -ENOMEM; 2383 } 2384 sb->s_fs_info = sbi; 2385 sbi->s_mount_opt = 0; 2386 sbi->s_resuid = EXT4_DEF_RESUID; 2387 sbi->s_resgid = EXT4_DEF_RESGID; 2388 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2389 sbi->s_sb_block = sb_block; 2390 sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, 2391 sectors[1]); 2392 2393 unlock_kernel(); 2394 2395 /* Cleanup superblock name */ 2396 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 2397 *cp = '!'; 2398 2399 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2400 if (!blocksize) { 2401 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 2402 goto out_fail; 2403 } 2404 2405 /* 2406 * The ext4 superblock will not be buffer aligned for other than 1kB 2407 * block sizes. We need to calculate the offset from buffer start. 2408 */ 2409 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 2410 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2411 offset = do_div(logical_sb_block, blocksize); 2412 } else { 2413 logical_sb_block = sb_block; 2414 } 2415 2416 if (!(bh = sb_bread(sb, logical_sb_block))) { 2417 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 2418 goto out_fail; 2419 } 2420 /* 2421 * Note: s_es must be initialized as soon as possible because 2422 * some ext4 macro-instructions depend on its value 2423 */ 2424 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2425 sbi->s_es = es; 2426 sb->s_magic = le16_to_cpu(es->s_magic); 2427 if (sb->s_magic != EXT4_SUPER_MAGIC) 2428 goto cantfind_ext4; 2429 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 2430 2431 /* Set defaults before we parse the mount options */ 2432 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2433 if (def_mount_opts & EXT4_DEFM_DEBUG) 2434 set_opt(sbi->s_mount_opt, DEBUG); 2435 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 2436 set_opt(sbi->s_mount_opt, GRPID); 2437 if (def_mount_opts & EXT4_DEFM_UID16) 2438 set_opt(sbi->s_mount_opt, NO_UID32); 2439 #ifdef CONFIG_EXT4_FS_XATTR 2440 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 2441 set_opt(sbi->s_mount_opt, XATTR_USER); 2442 #endif 2443 #ifdef CONFIG_EXT4_FS_POSIX_ACL 2444 if (def_mount_opts & EXT4_DEFM_ACL) 2445 set_opt(sbi->s_mount_opt, POSIX_ACL); 2446 #endif 2447 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2448 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 2449 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2450 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 2451 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2452 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 2453 2454 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2455 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2456 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 2457 set_opt(sbi->s_mount_opt, ERRORS_CONT); 2458 else 2459 set_opt(sbi->s_mount_opt, ERRORS_RO); 2460 2461 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2462 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2463 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2464 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2465 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2466 2467 set_opt(sbi->s_mount_opt, BARRIER); 2468 2469 /* 2470 * enable delayed allocation by default 2471 * Use -o nodelalloc to turn it off 2472 */ 2473 set_opt(sbi->s_mount_opt, DELALLOC); 2474 2475 if (!parse_options((char *) data, sb, &journal_devnum, 2476 &journal_ioprio, NULL, 0)) 2477 goto failed_mount; 2478 2479 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2480 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2481 2482 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2483 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2484 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2485 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2486 ext4_msg(sb, KERN_WARNING, 2487 "feature flags set on rev 0 fs, " 2488 "running e2fsck is recommended"); 2489 2490 /* 2491 * Check feature flags regardless of the revision level, since we 2492 * previously didn't change the revision level when setting the flags, 2493 * so there is a chance incompat flags are set on a rev 0 filesystem. 2494 */ 2495 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 2496 goto failed_mount; 2497 2498 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2499 2500 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2501 blocksize > EXT4_MAX_BLOCK_SIZE) { 2502 ext4_msg(sb, KERN_ERR, 2503 "Unsupported filesystem blocksize %d", blocksize); 2504 goto failed_mount; 2505 } 2506 2507 if (sb->s_blocksize != blocksize) { 2508 /* Validate the filesystem blocksize */ 2509 if (!sb_set_blocksize(sb, blocksize)) { 2510 ext4_msg(sb, KERN_ERR, "bad block size %d", 2511 blocksize); 2512 goto failed_mount; 2513 } 2514 2515 brelse(bh); 2516 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2517 offset = do_div(logical_sb_block, blocksize); 2518 bh = sb_bread(sb, logical_sb_block); 2519 if (!bh) { 2520 ext4_msg(sb, KERN_ERR, 2521 "Can't read superblock on 2nd try"); 2522 goto failed_mount; 2523 } 2524 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2525 sbi->s_es = es; 2526 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2527 ext4_msg(sb, KERN_ERR, 2528 "Magic mismatch, very weird!"); 2529 goto failed_mount; 2530 } 2531 } 2532 2533 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2534 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2535 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2536 has_huge_files); 2537 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2538 2539 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2540 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2541 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2542 } else { 2543 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2544 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2545 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2546 (!is_power_of_2(sbi->s_inode_size)) || 2547 (sbi->s_inode_size > blocksize)) { 2548 ext4_msg(sb, KERN_ERR, 2549 "unsupported inode size: %d", 2550 sbi->s_inode_size); 2551 goto failed_mount; 2552 } 2553 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2554 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2555 } 2556 2557 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2558 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2559 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2560 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2561 !is_power_of_2(sbi->s_desc_size)) { 2562 ext4_msg(sb, KERN_ERR, 2563 "unsupported descriptor size %lu", 2564 sbi->s_desc_size); 2565 goto failed_mount; 2566 } 2567 } else 2568 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2569 2570 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2571 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2572 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2573 goto cantfind_ext4; 2574 2575 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2576 if (sbi->s_inodes_per_block == 0) 2577 goto cantfind_ext4; 2578 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2579 sbi->s_inodes_per_block; 2580 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2581 sbi->s_sbh = bh; 2582 sbi->s_mount_state = le16_to_cpu(es->s_state); 2583 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2584 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2585 2586 for (i = 0; i < 4; i++) 2587 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2588 sbi->s_def_hash_version = es->s_def_hash_version; 2589 i = le32_to_cpu(es->s_flags); 2590 if (i & EXT2_FLAGS_UNSIGNED_HASH) 2591 sbi->s_hash_unsigned = 3; 2592 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 2593 #ifdef __CHAR_UNSIGNED__ 2594 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 2595 sbi->s_hash_unsigned = 3; 2596 #else 2597 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 2598 #endif 2599 sb->s_dirt = 1; 2600 } 2601 2602 if (sbi->s_blocks_per_group > blocksize * 8) { 2603 ext4_msg(sb, KERN_ERR, 2604 "#blocks per group too big: %lu", 2605 sbi->s_blocks_per_group); 2606 goto failed_mount; 2607 } 2608 if (sbi->s_inodes_per_group > blocksize * 8) { 2609 ext4_msg(sb, KERN_ERR, 2610 "#inodes per group too big: %lu", 2611 sbi->s_inodes_per_group); 2612 goto failed_mount; 2613 } 2614 2615 /* 2616 * Test whether we have more sectors than will fit in sector_t, 2617 * and whether the max offset is addressable by the page cache. 2618 */ 2619 if ((ext4_blocks_count(es) > 2620 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || 2621 (ext4_blocks_count(es) > 2622 (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { 2623 ext4_msg(sb, KERN_ERR, "filesystem" 2624 " too large to mount safely on this system"); 2625 if (sizeof(sector_t) < 8) 2626 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 2627 ret = -EFBIG; 2628 goto failed_mount; 2629 } 2630 2631 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2632 goto cantfind_ext4; 2633 2634 /* check blocks count against device size */ 2635 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2636 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2637 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 2638 "exceeds size of device (%llu blocks)", 2639 ext4_blocks_count(es), blocks_count); 2640 goto failed_mount; 2641 } 2642 2643 /* 2644 * It makes no sense for the first data block to be beyond the end 2645 * of the filesystem. 2646 */ 2647 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2648 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 2649 "block %u is beyond end of filesystem (%llu)", 2650 le32_to_cpu(es->s_first_data_block), 2651 ext4_blocks_count(es)); 2652 goto failed_mount; 2653 } 2654 blocks_count = (ext4_blocks_count(es) - 2655 le32_to_cpu(es->s_first_data_block) + 2656 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2657 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2658 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2659 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 2660 "(block count %llu, first data block %u, " 2661 "blocks per group %lu)", sbi->s_groups_count, 2662 ext4_blocks_count(es), 2663 le32_to_cpu(es->s_first_data_block), 2664 EXT4_BLOCKS_PER_GROUP(sb)); 2665 goto failed_mount; 2666 } 2667 sbi->s_groups_count = blocks_count; 2668 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 2669 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 2670 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2671 EXT4_DESC_PER_BLOCK(sb); 2672 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2673 GFP_KERNEL); 2674 if (sbi->s_group_desc == NULL) { 2675 ext4_msg(sb, KERN_ERR, "not enough memory"); 2676 goto failed_mount; 2677 } 2678 2679 #ifdef CONFIG_PROC_FS 2680 if (ext4_proc_root) 2681 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2682 #endif 2683 2684 bgl_lock_init(sbi->s_blockgroup_lock); 2685 2686 for (i = 0; i < db_count; i++) { 2687 block = descriptor_loc(sb, logical_sb_block, i); 2688 sbi->s_group_desc[i] = sb_bread(sb, block); 2689 if (!sbi->s_group_desc[i]) { 2690 ext4_msg(sb, KERN_ERR, 2691 "can't read group descriptor %d", i); 2692 db_count = i; 2693 goto failed_mount2; 2694 } 2695 } 2696 if (!ext4_check_descriptors(sb)) { 2697 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 2698 goto failed_mount2; 2699 } 2700 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2701 if (!ext4_fill_flex_info(sb)) { 2702 ext4_msg(sb, KERN_ERR, 2703 "unable to initialize " 2704 "flex_bg meta info!"); 2705 goto failed_mount2; 2706 } 2707 2708 sbi->s_gdb_count = db_count; 2709 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2710 spin_lock_init(&sbi->s_next_gen_lock); 2711 2712 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2713 ext4_count_free_blocks(sb)); 2714 if (!err) { 2715 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2716 ext4_count_free_inodes(sb)); 2717 } 2718 if (!err) { 2719 err = percpu_counter_init(&sbi->s_dirs_counter, 2720 ext4_count_dirs(sb)); 2721 } 2722 if (!err) { 2723 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2724 } 2725 if (err) { 2726 ext4_msg(sb, KERN_ERR, "insufficient memory"); 2727 goto failed_mount3; 2728 } 2729 2730 sbi->s_stripe = ext4_get_stripe_size(sbi); 2731 sbi->s_max_writeback_mb_bump = 128; 2732 2733 /* 2734 * set up enough so that it can read an inode 2735 */ 2736 if (!test_opt(sb, NOLOAD) && 2737 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 2738 sb->s_op = &ext4_sops; 2739 else 2740 sb->s_op = &ext4_nojournal_sops; 2741 sb->s_export_op = &ext4_export_ops; 2742 sb->s_xattr = ext4_xattr_handlers; 2743 #ifdef CONFIG_QUOTA 2744 sb->s_qcop = &ext4_qctl_operations; 2745 sb->dq_op = &ext4_quota_operations; 2746 #endif 2747 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2748 mutex_init(&sbi->s_orphan_lock); 2749 mutex_init(&sbi->s_resize_lock); 2750 2751 sb->s_root = NULL; 2752 2753 needs_recovery = (es->s_last_orphan != 0 || 2754 EXT4_HAS_INCOMPAT_FEATURE(sb, 2755 EXT4_FEATURE_INCOMPAT_RECOVER)); 2756 2757 /* 2758 * The first inode we look at is the journal inode. Don't try 2759 * root first: it may be modified in the journal! 2760 */ 2761 if (!test_opt(sb, NOLOAD) && 2762 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2763 if (ext4_load_journal(sb, es, journal_devnum)) 2764 goto failed_mount3; 2765 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2766 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2767 ext4_msg(sb, KERN_ERR, "required journal recovery " 2768 "suppressed and not mounted read-only"); 2769 goto failed_mount4; 2770 } else { 2771 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2772 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2773 sbi->s_journal = NULL; 2774 needs_recovery = 0; 2775 goto no_journal; 2776 } 2777 2778 if (ext4_blocks_count(es) > 0xffffffffULL && 2779 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2780 JBD2_FEATURE_INCOMPAT_64BIT)) { 2781 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 2782 goto failed_mount4; 2783 } 2784 2785 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2786 jbd2_journal_set_features(sbi->s_journal, 2787 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2788 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2789 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2790 jbd2_journal_set_features(sbi->s_journal, 2791 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2792 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2793 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2794 } else { 2795 jbd2_journal_clear_features(sbi->s_journal, 2796 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2797 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2798 } 2799 2800 /* We have now updated the journal if required, so we can 2801 * validate the data journaling mode. */ 2802 switch (test_opt(sb, DATA_FLAGS)) { 2803 case 0: 2804 /* No mode set, assume a default based on the journal 2805 * capabilities: ORDERED_DATA if the journal can 2806 * cope, else JOURNAL_DATA 2807 */ 2808 if (jbd2_journal_check_available_features 2809 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2810 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2811 else 2812 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2813 break; 2814 2815 case EXT4_MOUNT_ORDERED_DATA: 2816 case EXT4_MOUNT_WRITEBACK_DATA: 2817 if (!jbd2_journal_check_available_features 2818 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2819 ext4_msg(sb, KERN_ERR, "Journal does not support " 2820 "requested data journaling mode"); 2821 goto failed_mount4; 2822 } 2823 default: 2824 break; 2825 } 2826 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2827 2828 no_journal: 2829 2830 if (test_opt(sb, NOBH)) { 2831 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2832 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " 2833 "its supported only with writeback mode"); 2834 clear_opt(sbi->s_mount_opt, NOBH); 2835 } 2836 } 2837 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 2838 if (!EXT4_SB(sb)->dio_unwritten_wq) { 2839 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 2840 goto failed_mount_wq; 2841 } 2842 2843 /* 2844 * The jbd2_journal_load will have done any necessary log recovery, 2845 * so we can safely mount the rest of the filesystem now. 2846 */ 2847 2848 root = ext4_iget(sb, EXT4_ROOT_INO); 2849 if (IS_ERR(root)) { 2850 ext4_msg(sb, KERN_ERR, "get root inode failed"); 2851 ret = PTR_ERR(root); 2852 goto failed_mount4; 2853 } 2854 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2855 iput(root); 2856 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 2857 goto failed_mount4; 2858 } 2859 sb->s_root = d_alloc_root(root); 2860 if (!sb->s_root) { 2861 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 2862 iput(root); 2863 ret = -ENOMEM; 2864 goto failed_mount4; 2865 } 2866 2867 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2868 2869 /* determine the minimum size of new large inodes, if present */ 2870 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2871 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2872 EXT4_GOOD_OLD_INODE_SIZE; 2873 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2874 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2875 if (sbi->s_want_extra_isize < 2876 le16_to_cpu(es->s_want_extra_isize)) 2877 sbi->s_want_extra_isize = 2878 le16_to_cpu(es->s_want_extra_isize); 2879 if (sbi->s_want_extra_isize < 2880 le16_to_cpu(es->s_min_extra_isize)) 2881 sbi->s_want_extra_isize = 2882 le16_to_cpu(es->s_min_extra_isize); 2883 } 2884 } 2885 /* Check if enough inode space is available */ 2886 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2887 sbi->s_inode_size) { 2888 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2889 EXT4_GOOD_OLD_INODE_SIZE; 2890 ext4_msg(sb, KERN_INFO, "required extra inode space not" 2891 "available"); 2892 } 2893 2894 if (test_opt(sb, DELALLOC) && 2895 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 2896 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 2897 "requested data journaling mode"); 2898 clear_opt(sbi->s_mount_opt, DELALLOC); 2899 } 2900 2901 err = ext4_setup_system_zone(sb); 2902 if (err) { 2903 ext4_msg(sb, KERN_ERR, "failed to initialize system " 2904 "zone (%d)\n", err); 2905 goto failed_mount4; 2906 } 2907 2908 ext4_ext_init(sb); 2909 err = ext4_mb_init(sb, needs_recovery); 2910 if (err) { 2911 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", 2912 err); 2913 goto failed_mount4; 2914 } 2915 2916 sbi->s_kobj.kset = ext4_kset; 2917 init_completion(&sbi->s_kobj_unregister); 2918 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 2919 "%s", sb->s_id); 2920 if (err) { 2921 ext4_mb_release(sb); 2922 ext4_ext_release(sb); 2923 goto failed_mount4; 2924 }; 2925 2926 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2927 ext4_orphan_cleanup(sb, es); 2928 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2929 if (needs_recovery) { 2930 ext4_msg(sb, KERN_INFO, "recovery complete"); 2931 ext4_mark_recovery_complete(sb, es); 2932 } 2933 if (EXT4_SB(sb)->s_journal) { 2934 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 2935 descr = " journalled data mode"; 2936 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 2937 descr = " ordered data mode"; 2938 else 2939 descr = " writeback data mode"; 2940 } else 2941 descr = "out journal"; 2942 2943 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); 2944 2945 lock_kernel(); 2946 return 0; 2947 2948 cantfind_ext4: 2949 if (!silent) 2950 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 2951 goto failed_mount; 2952 2953 failed_mount4: 2954 ext4_msg(sb, KERN_ERR, "mount failed"); 2955 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 2956 failed_mount_wq: 2957 ext4_release_system_zone(sb); 2958 if (sbi->s_journal) { 2959 jbd2_journal_destroy(sbi->s_journal); 2960 sbi->s_journal = NULL; 2961 } 2962 failed_mount3: 2963 if (sbi->s_flex_groups) { 2964 if (is_vmalloc_addr(sbi->s_flex_groups)) 2965 vfree(sbi->s_flex_groups); 2966 else 2967 kfree(sbi->s_flex_groups); 2968 } 2969 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2970 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2971 percpu_counter_destroy(&sbi->s_dirs_counter); 2972 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 2973 failed_mount2: 2974 for (i = 0; i < db_count; i++) 2975 brelse(sbi->s_group_desc[i]); 2976 kfree(sbi->s_group_desc); 2977 failed_mount: 2978 if (sbi->s_proc) { 2979 remove_proc_entry(sb->s_id, ext4_proc_root); 2980 } 2981 #ifdef CONFIG_QUOTA 2982 for (i = 0; i < MAXQUOTAS; i++) 2983 kfree(sbi->s_qf_names[i]); 2984 #endif 2985 ext4_blkdev_remove(sbi); 2986 brelse(bh); 2987 out_fail: 2988 sb->s_fs_info = NULL; 2989 kfree(sbi->s_blockgroup_lock); 2990 kfree(sbi); 2991 lock_kernel(); 2992 return ret; 2993 } 2994 2995 /* 2996 * Setup any per-fs journal parameters now. We'll do this both on 2997 * initial mount, once the journal has been initialised but before we've 2998 * done any recovery; and again on any subsequent remount. 2999 */ 3000 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 3001 { 3002 struct ext4_sb_info *sbi = EXT4_SB(sb); 3003 3004 journal->j_commit_interval = sbi->s_commit_interval; 3005 journal->j_min_batch_time = sbi->s_min_batch_time; 3006 journal->j_max_batch_time = sbi->s_max_batch_time; 3007 3008 spin_lock(&journal->j_state_lock); 3009 if (test_opt(sb, BARRIER)) 3010 journal->j_flags |= JBD2_BARRIER; 3011 else 3012 journal->j_flags &= ~JBD2_BARRIER; 3013 if (test_opt(sb, DATA_ERR_ABORT)) 3014 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 3015 else 3016 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 3017 spin_unlock(&journal->j_state_lock); 3018 } 3019 3020 static journal_t *ext4_get_journal(struct super_block *sb, 3021 unsigned int journal_inum) 3022 { 3023 struct inode *journal_inode; 3024 journal_t *journal; 3025 3026 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3027 3028 /* First, test for the existence of a valid inode on disk. Bad 3029 * things happen if we iget() an unused inode, as the subsequent 3030 * iput() will try to delete it. */ 3031 3032 journal_inode = ext4_iget(sb, journal_inum); 3033 if (IS_ERR(journal_inode)) { 3034 ext4_msg(sb, KERN_ERR, "no journal found"); 3035 return NULL; 3036 } 3037 if (!journal_inode->i_nlink) { 3038 make_bad_inode(journal_inode); 3039 iput(journal_inode); 3040 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 3041 return NULL; 3042 } 3043 3044 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 3045 journal_inode, journal_inode->i_size); 3046 if (!S_ISREG(journal_inode->i_mode)) { 3047 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 3048 iput(journal_inode); 3049 return NULL; 3050 } 3051 3052 journal = jbd2_journal_init_inode(journal_inode); 3053 if (!journal) { 3054 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 3055 iput(journal_inode); 3056 return NULL; 3057 } 3058 journal->j_private = sb; 3059 ext4_init_journal_params(sb, journal); 3060 return journal; 3061 } 3062 3063 static journal_t *ext4_get_dev_journal(struct super_block *sb, 3064 dev_t j_dev) 3065 { 3066 struct buffer_head *bh; 3067 journal_t *journal; 3068 ext4_fsblk_t start; 3069 ext4_fsblk_t len; 3070 int hblock, blocksize; 3071 ext4_fsblk_t sb_block; 3072 unsigned long offset; 3073 struct ext4_super_block *es; 3074 struct block_device *bdev; 3075 3076 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3077 3078 bdev = ext4_blkdev_get(j_dev, sb); 3079 if (bdev == NULL) 3080 return NULL; 3081 3082 if (bd_claim(bdev, sb)) { 3083 ext4_msg(sb, KERN_ERR, 3084 "failed to claim external journal device"); 3085 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3086 return NULL; 3087 } 3088 3089 blocksize = sb->s_blocksize; 3090 hblock = bdev_logical_block_size(bdev); 3091 if (blocksize < hblock) { 3092 ext4_msg(sb, KERN_ERR, 3093 "blocksize too small for journal device"); 3094 goto out_bdev; 3095 } 3096 3097 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 3098 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3099 set_blocksize(bdev, blocksize); 3100 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3101 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 3102 "external journal"); 3103 goto out_bdev; 3104 } 3105 3106 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3107 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3108 !(le32_to_cpu(es->s_feature_incompat) & 3109 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3110 ext4_msg(sb, KERN_ERR, "external journal has " 3111 "bad superblock"); 3112 brelse(bh); 3113 goto out_bdev; 3114 } 3115 3116 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3117 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 3118 brelse(bh); 3119 goto out_bdev; 3120 } 3121 3122 len = ext4_blocks_count(es); 3123 start = sb_block + 1; 3124 brelse(bh); /* we're done with the superblock */ 3125 3126 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3127 start, len, blocksize); 3128 if (!journal) { 3129 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 3130 goto out_bdev; 3131 } 3132 journal->j_private = sb; 3133 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3134 wait_on_buffer(journal->j_sb_buffer); 3135 if (!buffer_uptodate(journal->j_sb_buffer)) { 3136 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 3137 goto out_journal; 3138 } 3139 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3140 ext4_msg(sb, KERN_ERR, "External journal has more than one " 3141 "user (unsupported) - %d", 3142 be32_to_cpu(journal->j_superblock->s_nr_users)); 3143 goto out_journal; 3144 } 3145 EXT4_SB(sb)->journal_bdev = bdev; 3146 ext4_init_journal_params(sb, journal); 3147 return journal; 3148 3149 out_journal: 3150 jbd2_journal_destroy(journal); 3151 out_bdev: 3152 ext4_blkdev_put(bdev); 3153 return NULL; 3154 } 3155 3156 static int ext4_load_journal(struct super_block *sb, 3157 struct ext4_super_block *es, 3158 unsigned long journal_devnum) 3159 { 3160 journal_t *journal; 3161 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 3162 dev_t journal_dev; 3163 int err = 0; 3164 int really_read_only; 3165 3166 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3167 3168 if (journal_devnum && 3169 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3170 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 3171 "numbers have changed"); 3172 journal_dev = new_decode_dev(journal_devnum); 3173 } else 3174 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3175 3176 really_read_only = bdev_read_only(sb->s_bdev); 3177 3178 /* 3179 * Are we loading a blank journal or performing recovery after a 3180 * crash? For recovery, we need to check in advance whether we 3181 * can get read-write access to the device. 3182 */ 3183 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3184 if (sb->s_flags & MS_RDONLY) { 3185 ext4_msg(sb, KERN_INFO, "INFO: recovery " 3186 "required on readonly filesystem"); 3187 if (really_read_only) { 3188 ext4_msg(sb, KERN_ERR, "write access " 3189 "unavailable, cannot proceed"); 3190 return -EROFS; 3191 } 3192 ext4_msg(sb, KERN_INFO, "write access will " 3193 "be enabled during recovery"); 3194 } 3195 } 3196 3197 if (journal_inum && journal_dev) { 3198 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 3199 "and inode journals!"); 3200 return -EINVAL; 3201 } 3202 3203 if (journal_inum) { 3204 if (!(journal = ext4_get_journal(sb, journal_inum))) 3205 return -EINVAL; 3206 } else { 3207 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 3208 return -EINVAL; 3209 } 3210 3211 if (!(journal->j_flags & JBD2_BARRIER)) 3212 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3213 3214 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3215 err = jbd2_journal_update_format(journal); 3216 if (err) { 3217 ext4_msg(sb, KERN_ERR, "error updating journal"); 3218 jbd2_journal_destroy(journal); 3219 return err; 3220 } 3221 } 3222 3223 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3224 err = jbd2_journal_wipe(journal, !really_read_only); 3225 if (!err) 3226 err = jbd2_journal_load(journal); 3227 3228 if (err) { 3229 ext4_msg(sb, KERN_ERR, "error loading journal"); 3230 jbd2_journal_destroy(journal); 3231 return err; 3232 } 3233 3234 EXT4_SB(sb)->s_journal = journal; 3235 ext4_clear_journal_err(sb, es); 3236 3237 if (journal_devnum && 3238 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3239 es->s_journal_dev = cpu_to_le32(journal_devnum); 3240 3241 /* Make sure we flush the recovery flag to disk. */ 3242 ext4_commit_super(sb, 1); 3243 } 3244 3245 return 0; 3246 } 3247 3248 static int ext4_commit_super(struct super_block *sb, int sync) 3249 { 3250 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 3251 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3252 int error = 0; 3253 3254 if (!sbh) 3255 return error; 3256 if (buffer_write_io_error(sbh)) { 3257 /* 3258 * Oh, dear. A previous attempt to write the 3259 * superblock failed. This could happen because the 3260 * USB device was yanked out. Or it could happen to 3261 * be a transient write error and maybe the block will 3262 * be remapped. Nothing we can do but to retry the 3263 * write and hope for the best. 3264 */ 3265 ext4_msg(sb, KERN_ERR, "previous I/O error to " 3266 "superblock detected"); 3267 clear_buffer_write_io_error(sbh); 3268 set_buffer_uptodate(sbh); 3269 } 3270 /* 3271 * If the file system is mounted read-only, don't update the 3272 * superblock write time. This avoids updating the superblock 3273 * write time when we are mounting the root file system 3274 * read/only but we need to replay the journal; at that point, 3275 * for people who are east of GMT and who make their clock 3276 * tick in localtime for Windows bug-for-bug compatibility, 3277 * the clock is set in the future, and this will cause e2fsck 3278 * to complain and force a full file system check. 3279 */ 3280 if (!(sb->s_flags & MS_RDONLY)) 3281 es->s_wtime = cpu_to_le32(get_seconds()); 3282 es->s_kbytes_written = 3283 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3284 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3285 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3286 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3287 &EXT4_SB(sb)->s_freeblocks_counter)); 3288 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3289 &EXT4_SB(sb)->s_freeinodes_counter)); 3290 sb->s_dirt = 0; 3291 BUFFER_TRACE(sbh, "marking dirty"); 3292 mark_buffer_dirty(sbh); 3293 if (sync) { 3294 error = sync_dirty_buffer(sbh); 3295 if (error) 3296 return error; 3297 3298 error = buffer_write_io_error(sbh); 3299 if (error) { 3300 ext4_msg(sb, KERN_ERR, "I/O error while writing " 3301 "superblock"); 3302 clear_buffer_write_io_error(sbh); 3303 set_buffer_uptodate(sbh); 3304 } 3305 } 3306 return error; 3307 } 3308 3309 /* 3310 * Have we just finished recovery? If so, and if we are mounting (or 3311 * remounting) the filesystem readonly, then we will end up with a 3312 * consistent fs on disk. Record that fact. 3313 */ 3314 static void ext4_mark_recovery_complete(struct super_block *sb, 3315 struct ext4_super_block *es) 3316 { 3317 journal_t *journal = EXT4_SB(sb)->s_journal; 3318 3319 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3320 BUG_ON(journal != NULL); 3321 return; 3322 } 3323 jbd2_journal_lock_updates(journal); 3324 if (jbd2_journal_flush(journal) < 0) 3325 goto out; 3326 3327 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3328 sb->s_flags & MS_RDONLY) { 3329 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3330 ext4_commit_super(sb, 1); 3331 } 3332 3333 out: 3334 jbd2_journal_unlock_updates(journal); 3335 } 3336 3337 /* 3338 * If we are mounting (or read-write remounting) a filesystem whose journal 3339 * has recorded an error from a previous lifetime, move that error to the 3340 * main filesystem now. 3341 */ 3342 static void ext4_clear_journal_err(struct super_block *sb, 3343 struct ext4_super_block *es) 3344 { 3345 journal_t *journal; 3346 int j_errno; 3347 const char *errstr; 3348 3349 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3350 3351 journal = EXT4_SB(sb)->s_journal; 3352 3353 /* 3354 * Now check for any error status which may have been recorded in the 3355 * journal by a prior ext4_error() or ext4_abort() 3356 */ 3357 3358 j_errno = jbd2_journal_errno(journal); 3359 if (j_errno) { 3360 char nbuf[16]; 3361 3362 errstr = ext4_decode_error(sb, j_errno, nbuf); 3363 ext4_warning(sb, __func__, "Filesystem error recorded " 3364 "from previous mount: %s", errstr); 3365 ext4_warning(sb, __func__, "Marking fs in need of " 3366 "filesystem check."); 3367 3368 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3369 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3370 ext4_commit_super(sb, 1); 3371 3372 jbd2_journal_clear_err(journal); 3373 } 3374 } 3375 3376 /* 3377 * Force the running and committing transactions to commit, 3378 * and wait on the commit. 3379 */ 3380 int ext4_force_commit(struct super_block *sb) 3381 { 3382 journal_t *journal; 3383 int ret = 0; 3384 3385 if (sb->s_flags & MS_RDONLY) 3386 return 0; 3387 3388 journal = EXT4_SB(sb)->s_journal; 3389 if (journal) 3390 ret = ext4_journal_force_commit(journal); 3391 3392 return ret; 3393 } 3394 3395 static void ext4_write_super(struct super_block *sb) 3396 { 3397 lock_super(sb); 3398 ext4_commit_super(sb, 1); 3399 unlock_super(sb); 3400 } 3401 3402 static int ext4_sync_fs(struct super_block *sb, int wait) 3403 { 3404 int ret = 0; 3405 tid_t target; 3406 struct ext4_sb_info *sbi = EXT4_SB(sb); 3407 3408 trace_ext4_sync_fs(sb, wait); 3409 flush_workqueue(sbi->dio_unwritten_wq); 3410 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 3411 if (wait) 3412 jbd2_log_wait_commit(sbi->s_journal, target); 3413 } 3414 return ret; 3415 } 3416 3417 /* 3418 * LVM calls this function before a (read-only) snapshot is created. This 3419 * gives us a chance to flush the journal completely and mark the fs clean. 3420 */ 3421 static int ext4_freeze(struct super_block *sb) 3422 { 3423 int error = 0; 3424 journal_t *journal; 3425 3426 if (sb->s_flags & MS_RDONLY) 3427 return 0; 3428 3429 journal = EXT4_SB(sb)->s_journal; 3430 3431 /* Now we set up the journal barrier. */ 3432 jbd2_journal_lock_updates(journal); 3433 3434 /* 3435 * Don't clear the needs_recovery flag if we failed to flush 3436 * the journal. 3437 */ 3438 error = jbd2_journal_flush(journal); 3439 if (error < 0) { 3440 out: 3441 jbd2_journal_unlock_updates(journal); 3442 return error; 3443 } 3444 3445 /* Journal blocked and flushed, clear needs_recovery flag. */ 3446 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3447 error = ext4_commit_super(sb, 1); 3448 if (error) 3449 goto out; 3450 return 0; 3451 } 3452 3453 /* 3454 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3455 * flag here, even though the filesystem is not technically dirty yet. 3456 */ 3457 static int ext4_unfreeze(struct super_block *sb) 3458 { 3459 if (sb->s_flags & MS_RDONLY) 3460 return 0; 3461 3462 lock_super(sb); 3463 /* Reset the needs_recovery flag before the fs is unlocked. */ 3464 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3465 ext4_commit_super(sb, 1); 3466 unlock_super(sb); 3467 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3468 return 0; 3469 } 3470 3471 static int ext4_remount(struct super_block *sb, int *flags, char *data) 3472 { 3473 struct ext4_super_block *es; 3474 struct ext4_sb_info *sbi = EXT4_SB(sb); 3475 ext4_fsblk_t n_blocks_count = 0; 3476 unsigned long old_sb_flags; 3477 struct ext4_mount_options old_opts; 3478 ext4_group_t g; 3479 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3480 int err; 3481 #ifdef CONFIG_QUOTA 3482 int i; 3483 #endif 3484 3485 lock_kernel(); 3486 3487 /* Store the original options */ 3488 lock_super(sb); 3489 old_sb_flags = sb->s_flags; 3490 old_opts.s_mount_opt = sbi->s_mount_opt; 3491 old_opts.s_resuid = sbi->s_resuid; 3492 old_opts.s_resgid = sbi->s_resgid; 3493 old_opts.s_commit_interval = sbi->s_commit_interval; 3494 old_opts.s_min_batch_time = sbi->s_min_batch_time; 3495 old_opts.s_max_batch_time = sbi->s_max_batch_time; 3496 #ifdef CONFIG_QUOTA 3497 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 3498 for (i = 0; i < MAXQUOTAS; i++) 3499 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 3500 #endif 3501 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 3502 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 3503 3504 /* 3505 * Allow the "check" option to be passed as a remount option. 3506 */ 3507 if (!parse_options(data, sb, NULL, &journal_ioprio, 3508 &n_blocks_count, 1)) { 3509 err = -EINVAL; 3510 goto restore_opts; 3511 } 3512 3513 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 3514 ext4_abort(sb, __func__, "Abort forced by user"); 3515 3516 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3517 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 3518 3519 es = sbi->s_es; 3520 3521 if (sbi->s_journal) { 3522 ext4_init_journal_params(sb, sbi->s_journal); 3523 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3524 } 3525 3526 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3527 n_blocks_count > ext4_blocks_count(es)) { 3528 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 3529 err = -EROFS; 3530 goto restore_opts; 3531 } 3532 3533 if (*flags & MS_RDONLY) { 3534 /* 3535 * First of all, the unconditional stuff we have to do 3536 * to disable replay of the journal when we next remount 3537 */ 3538 sb->s_flags |= MS_RDONLY; 3539 3540 /* 3541 * OK, test if we are remounting a valid rw partition 3542 * readonly, and if so set the rdonly flag and then 3543 * mark the partition as valid again. 3544 */ 3545 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3546 (sbi->s_mount_state & EXT4_VALID_FS)) 3547 es->s_state = cpu_to_le16(sbi->s_mount_state); 3548 3549 if (sbi->s_journal) 3550 ext4_mark_recovery_complete(sb, es); 3551 } else { 3552 /* Make sure we can mount this feature set readwrite */ 3553 if (!ext4_feature_set_ok(sb, 0)) { 3554 err = -EROFS; 3555 goto restore_opts; 3556 } 3557 /* 3558 * Make sure the group descriptor checksums 3559 * are sane. If they aren't, refuse to remount r/w. 3560 */ 3561 for (g = 0; g < sbi->s_groups_count; g++) { 3562 struct ext4_group_desc *gdp = 3563 ext4_get_group_desc(sb, g, NULL); 3564 3565 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3566 ext4_msg(sb, KERN_ERR, 3567 "ext4_remount: Checksum for group %u failed (%u!=%u)", 3568 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3569 le16_to_cpu(gdp->bg_checksum)); 3570 err = -EINVAL; 3571 goto restore_opts; 3572 } 3573 } 3574 3575 /* 3576 * If we have an unprocessed orphan list hanging 3577 * around from a previously readonly bdev mount, 3578 * require a full umount/remount for now. 3579 */ 3580 if (es->s_last_orphan) { 3581 ext4_msg(sb, KERN_WARNING, "Couldn't " 3582 "remount RDWR because of unprocessed " 3583 "orphan inode list. Please " 3584 "umount/remount instead"); 3585 err = -EINVAL; 3586 goto restore_opts; 3587 } 3588 3589 /* 3590 * Mounting a RDONLY partition read-write, so reread 3591 * and store the current valid flag. (It may have 3592 * been changed by e2fsck since we originally mounted 3593 * the partition.) 3594 */ 3595 if (sbi->s_journal) 3596 ext4_clear_journal_err(sb, es); 3597 sbi->s_mount_state = le16_to_cpu(es->s_state); 3598 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3599 goto restore_opts; 3600 if (!ext4_setup_super(sb, es, 0)) 3601 sb->s_flags &= ~MS_RDONLY; 3602 } 3603 } 3604 ext4_setup_system_zone(sb); 3605 if (sbi->s_journal == NULL) 3606 ext4_commit_super(sb, 1); 3607 3608 #ifdef CONFIG_QUOTA 3609 /* Release old quota file names */ 3610 for (i = 0; i < MAXQUOTAS; i++) 3611 if (old_opts.s_qf_names[i] && 3612 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3613 kfree(old_opts.s_qf_names[i]); 3614 #endif 3615 unlock_super(sb); 3616 unlock_kernel(); 3617 return 0; 3618 3619 restore_opts: 3620 sb->s_flags = old_sb_flags; 3621 sbi->s_mount_opt = old_opts.s_mount_opt; 3622 sbi->s_resuid = old_opts.s_resuid; 3623 sbi->s_resgid = old_opts.s_resgid; 3624 sbi->s_commit_interval = old_opts.s_commit_interval; 3625 sbi->s_min_batch_time = old_opts.s_min_batch_time; 3626 sbi->s_max_batch_time = old_opts.s_max_batch_time; 3627 #ifdef CONFIG_QUOTA 3628 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3629 for (i = 0; i < MAXQUOTAS; i++) { 3630 if (sbi->s_qf_names[i] && 3631 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3632 kfree(sbi->s_qf_names[i]); 3633 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3634 } 3635 #endif 3636 unlock_super(sb); 3637 unlock_kernel(); 3638 return err; 3639 } 3640 3641 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3642 { 3643 struct super_block *sb = dentry->d_sb; 3644 struct ext4_sb_info *sbi = EXT4_SB(sb); 3645 struct ext4_super_block *es = sbi->s_es; 3646 u64 fsid; 3647 3648 if (test_opt(sb, MINIX_DF)) { 3649 sbi->s_overhead_last = 0; 3650 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3651 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3652 ext4_fsblk_t overhead = 0; 3653 3654 /* 3655 * Compute the overhead (FS structures). This is constant 3656 * for a given filesystem unless the number of block groups 3657 * changes so we cache the previous value until it does. 3658 */ 3659 3660 /* 3661 * All of the blocks before first_data_block are 3662 * overhead 3663 */ 3664 overhead = le32_to_cpu(es->s_first_data_block); 3665 3666 /* 3667 * Add the overhead attributed to the superblock and 3668 * block group descriptors. If the sparse superblocks 3669 * feature is turned on, then not all groups have this. 3670 */ 3671 for (i = 0; i < ngroups; i++) { 3672 overhead += ext4_bg_has_super(sb, i) + 3673 ext4_bg_num_gdb(sb, i); 3674 cond_resched(); 3675 } 3676 3677 /* 3678 * Every block group has an inode bitmap, a block 3679 * bitmap, and an inode table. 3680 */ 3681 overhead += ngroups * (2 + sbi->s_itb_per_group); 3682 sbi->s_overhead_last = overhead; 3683 smp_wmb(); 3684 sbi->s_blocks_last = ext4_blocks_count(es); 3685 } 3686 3687 buf->f_type = EXT4_SUPER_MAGIC; 3688 buf->f_bsize = sb->s_blocksize; 3689 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3690 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3691 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3692 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3693 if (buf->f_bfree < ext4_r_blocks_count(es)) 3694 buf->f_bavail = 0; 3695 buf->f_files = le32_to_cpu(es->s_inodes_count); 3696 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3697 buf->f_namelen = EXT4_NAME_LEN; 3698 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3699 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3700 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3701 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3702 3703 return 0; 3704 } 3705 3706 /* Helper function for writing quotas on sync - we need to start transaction 3707 * before quota file is locked for write. Otherwise the are possible deadlocks: 3708 * Process 1 Process 2 3709 * ext4_create() quota_sync() 3710 * jbd2_journal_start() write_dquot() 3711 * vfs_dq_init() down(dqio_mutex) 3712 * down(dqio_mutex) jbd2_journal_start() 3713 * 3714 */ 3715 3716 #ifdef CONFIG_QUOTA 3717 3718 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3719 { 3720 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3721 } 3722 3723 static int ext4_write_dquot(struct dquot *dquot) 3724 { 3725 int ret, err; 3726 handle_t *handle; 3727 struct inode *inode; 3728 3729 inode = dquot_to_inode(dquot); 3730 handle = ext4_journal_start(inode, 3731 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3732 if (IS_ERR(handle)) 3733 return PTR_ERR(handle); 3734 ret = dquot_commit(dquot); 3735 err = ext4_journal_stop(handle); 3736 if (!ret) 3737 ret = err; 3738 return ret; 3739 } 3740 3741 static int ext4_acquire_dquot(struct dquot *dquot) 3742 { 3743 int ret, err; 3744 handle_t *handle; 3745 3746 handle = ext4_journal_start(dquot_to_inode(dquot), 3747 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3748 if (IS_ERR(handle)) 3749 return PTR_ERR(handle); 3750 ret = dquot_acquire(dquot); 3751 err = ext4_journal_stop(handle); 3752 if (!ret) 3753 ret = err; 3754 return ret; 3755 } 3756 3757 static int ext4_release_dquot(struct dquot *dquot) 3758 { 3759 int ret, err; 3760 handle_t *handle; 3761 3762 handle = ext4_journal_start(dquot_to_inode(dquot), 3763 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3764 if (IS_ERR(handle)) { 3765 /* Release dquot anyway to avoid endless cycle in dqput() */ 3766 dquot_release(dquot); 3767 return PTR_ERR(handle); 3768 } 3769 ret = dquot_release(dquot); 3770 err = ext4_journal_stop(handle); 3771 if (!ret) 3772 ret = err; 3773 return ret; 3774 } 3775 3776 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3777 { 3778 /* Are we journaling quotas? */ 3779 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3780 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3781 dquot_mark_dquot_dirty(dquot); 3782 return ext4_write_dquot(dquot); 3783 } else { 3784 return dquot_mark_dquot_dirty(dquot); 3785 } 3786 } 3787 3788 static int ext4_write_info(struct super_block *sb, int type) 3789 { 3790 int ret, err; 3791 handle_t *handle; 3792 3793 /* Data block + inode block */ 3794 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3795 if (IS_ERR(handle)) 3796 return PTR_ERR(handle); 3797 ret = dquot_commit_info(sb, type); 3798 err = ext4_journal_stop(handle); 3799 if (!ret) 3800 ret = err; 3801 return ret; 3802 } 3803 3804 /* 3805 * Turn on quotas during mount time - we need to find 3806 * the quota file and such... 3807 */ 3808 static int ext4_quota_on_mount(struct super_block *sb, int type) 3809 { 3810 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3811 EXT4_SB(sb)->s_jquota_fmt, type); 3812 } 3813 3814 /* 3815 * Standard function to be called on quota_on 3816 */ 3817 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3818 char *name, int remount) 3819 { 3820 int err; 3821 struct path path; 3822 3823 if (!test_opt(sb, QUOTA)) 3824 return -EINVAL; 3825 /* When remounting, no checks are needed and in fact, name is NULL */ 3826 if (remount) 3827 return vfs_quota_on(sb, type, format_id, name, remount); 3828 3829 err = kern_path(name, LOOKUP_FOLLOW, &path); 3830 if (err) 3831 return err; 3832 3833 /* Quotafile not on the same filesystem? */ 3834 if (path.mnt->mnt_sb != sb) { 3835 path_put(&path); 3836 return -EXDEV; 3837 } 3838 /* Journaling quota? */ 3839 if (EXT4_SB(sb)->s_qf_names[type]) { 3840 /* Quotafile not in fs root? */ 3841 if (path.dentry->d_parent != sb->s_root) 3842 ext4_msg(sb, KERN_WARNING, 3843 "Quota file not on filesystem root. " 3844 "Journaled quota will not work"); 3845 } 3846 3847 /* 3848 * When we journal data on quota file, we have to flush journal to see 3849 * all updates to the file when we bypass pagecache... 3850 */ 3851 if (EXT4_SB(sb)->s_journal && 3852 ext4_should_journal_data(path.dentry->d_inode)) { 3853 /* 3854 * We don't need to lock updates but journal_flush() could 3855 * otherwise be livelocked... 3856 */ 3857 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3858 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3859 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3860 if (err) { 3861 path_put(&path); 3862 return err; 3863 } 3864 } 3865 3866 err = vfs_quota_on_path(sb, type, format_id, &path); 3867 path_put(&path); 3868 return err; 3869 } 3870 3871 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3872 * acquiring the locks... As quota files are never truncated and quota code 3873 * itself serializes the operations (and noone else should touch the files) 3874 * we don't have to be afraid of races */ 3875 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3876 size_t len, loff_t off) 3877 { 3878 struct inode *inode = sb_dqopt(sb)->files[type]; 3879 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3880 int err = 0; 3881 int offset = off & (sb->s_blocksize - 1); 3882 int tocopy; 3883 size_t toread; 3884 struct buffer_head *bh; 3885 loff_t i_size = i_size_read(inode); 3886 3887 if (off > i_size) 3888 return 0; 3889 if (off+len > i_size) 3890 len = i_size-off; 3891 toread = len; 3892 while (toread > 0) { 3893 tocopy = sb->s_blocksize - offset < toread ? 3894 sb->s_blocksize - offset : toread; 3895 bh = ext4_bread(NULL, inode, blk, 0, &err); 3896 if (err) 3897 return err; 3898 if (!bh) /* A hole? */ 3899 memset(data, 0, tocopy); 3900 else 3901 memcpy(data, bh->b_data+offset, tocopy); 3902 brelse(bh); 3903 offset = 0; 3904 toread -= tocopy; 3905 data += tocopy; 3906 blk++; 3907 } 3908 return len; 3909 } 3910 3911 /* Write to quotafile (we know the transaction is already started and has 3912 * enough credits) */ 3913 static ssize_t ext4_quota_write(struct super_block *sb, int type, 3914 const char *data, size_t len, loff_t off) 3915 { 3916 struct inode *inode = sb_dqopt(sb)->files[type]; 3917 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3918 int err = 0; 3919 int offset = off & (sb->s_blocksize - 1); 3920 int tocopy; 3921 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 3922 size_t towrite = len; 3923 struct buffer_head *bh; 3924 handle_t *handle = journal_current_handle(); 3925 3926 if (EXT4_SB(sb)->s_journal && !handle) { 3927 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 3928 " cancelled because transaction is not started", 3929 (unsigned long long)off, (unsigned long long)len); 3930 return -EIO; 3931 } 3932 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 3933 while (towrite > 0) { 3934 tocopy = sb->s_blocksize - offset < towrite ? 3935 sb->s_blocksize - offset : towrite; 3936 bh = ext4_bread(handle, inode, blk, 1, &err); 3937 if (!bh) 3938 goto out; 3939 if (journal_quota) { 3940 err = ext4_journal_get_write_access(handle, bh); 3941 if (err) { 3942 brelse(bh); 3943 goto out; 3944 } 3945 } 3946 lock_buffer(bh); 3947 memcpy(bh->b_data+offset, data, tocopy); 3948 flush_dcache_page(bh->b_page); 3949 unlock_buffer(bh); 3950 if (journal_quota) 3951 err = ext4_handle_dirty_metadata(handle, NULL, bh); 3952 else { 3953 /* Always do at least ordered writes for quotas */ 3954 err = ext4_jbd2_file_inode(handle, inode); 3955 mark_buffer_dirty(bh); 3956 } 3957 brelse(bh); 3958 if (err) 3959 goto out; 3960 offset = 0; 3961 towrite -= tocopy; 3962 data += tocopy; 3963 blk++; 3964 } 3965 out: 3966 if (len == towrite) { 3967 mutex_unlock(&inode->i_mutex); 3968 return err; 3969 } 3970 if (inode->i_size < off+len-towrite) { 3971 i_size_write(inode, off+len-towrite); 3972 EXT4_I(inode)->i_disksize = inode->i_size; 3973 } 3974 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3975 ext4_mark_inode_dirty(handle, inode); 3976 mutex_unlock(&inode->i_mutex); 3977 return len - towrite; 3978 } 3979 3980 #endif 3981 3982 static int ext4_get_sb(struct file_system_type *fs_type, int flags, 3983 const char *dev_name, void *data, struct vfsmount *mnt) 3984 { 3985 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 3986 } 3987 3988 #if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 3989 static struct file_system_type ext2_fs_type = { 3990 .owner = THIS_MODULE, 3991 .name = "ext2", 3992 .get_sb = ext4_get_sb, 3993 .kill_sb = kill_block_super, 3994 .fs_flags = FS_REQUIRES_DEV, 3995 }; 3996 3997 static inline void register_as_ext2(void) 3998 { 3999 int err = register_filesystem(&ext2_fs_type); 4000 if (err) 4001 printk(KERN_WARNING 4002 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 4003 } 4004 4005 static inline void unregister_as_ext2(void) 4006 { 4007 unregister_filesystem(&ext2_fs_type); 4008 } 4009 MODULE_ALIAS("ext2"); 4010 #else 4011 static inline void register_as_ext2(void) { } 4012 static inline void unregister_as_ext2(void) { } 4013 #endif 4014 4015 #if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4016 static struct file_system_type ext3_fs_type = { 4017 .owner = THIS_MODULE, 4018 .name = "ext3", 4019 .get_sb = ext4_get_sb, 4020 .kill_sb = kill_block_super, 4021 .fs_flags = FS_REQUIRES_DEV, 4022 }; 4023 4024 static inline void register_as_ext3(void) 4025 { 4026 int err = register_filesystem(&ext3_fs_type); 4027 if (err) 4028 printk(KERN_WARNING 4029 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 4030 } 4031 4032 static inline void unregister_as_ext3(void) 4033 { 4034 unregister_filesystem(&ext3_fs_type); 4035 } 4036 MODULE_ALIAS("ext3"); 4037 #else 4038 static inline void register_as_ext3(void) { } 4039 static inline void unregister_as_ext3(void) { } 4040 #endif 4041 4042 static struct file_system_type ext4_fs_type = { 4043 .owner = THIS_MODULE, 4044 .name = "ext4", 4045 .get_sb = ext4_get_sb, 4046 .kill_sb = kill_block_super, 4047 .fs_flags = FS_REQUIRES_DEV, 4048 }; 4049 4050 static int __init init_ext4_fs(void) 4051 { 4052 int err; 4053 4054 err = init_ext4_system_zone(); 4055 if (err) 4056 return err; 4057 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4058 if (!ext4_kset) 4059 goto out4; 4060 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4061 err = init_ext4_mballoc(); 4062 if (err) 4063 goto out3; 4064 4065 err = init_ext4_xattr(); 4066 if (err) 4067 goto out2; 4068 err = init_inodecache(); 4069 if (err) 4070 goto out1; 4071 register_as_ext2(); 4072 register_as_ext3(); 4073 err = register_filesystem(&ext4_fs_type); 4074 if (err) 4075 goto out; 4076 return 0; 4077 out: 4078 unregister_as_ext2(); 4079 unregister_as_ext3(); 4080 destroy_inodecache(); 4081 out1: 4082 exit_ext4_xattr(); 4083 out2: 4084 exit_ext4_mballoc(); 4085 out3: 4086 remove_proc_entry("fs/ext4", NULL); 4087 kset_unregister(ext4_kset); 4088 out4: 4089 exit_ext4_system_zone(); 4090 return err; 4091 } 4092 4093 static void __exit exit_ext4_fs(void) 4094 { 4095 unregister_as_ext2(); 4096 unregister_as_ext3(); 4097 unregister_filesystem(&ext4_fs_type); 4098 destroy_inodecache(); 4099 exit_ext4_xattr(); 4100 exit_ext4_mballoc(); 4101 remove_proc_entry("fs/ext4", NULL); 4102 kset_unregister(ext4_kset); 4103 exit_ext4_system_zone(); 4104 } 4105 4106 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4107 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4108 MODULE_LICENSE("GPL"); 4109 module_init(init_ext4_fs) 4110 module_exit(exit_ext4_fs) 4111