1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/jbd2.h> 24 #include <linux/slab.h> 25 #include <linux/init.h> 26 #include <linux/blkdev.h> 27 #include <linux/parser.h> 28 #include <linux/smp_lock.h> 29 #include <linux/buffer_head.h> 30 #include <linux/exportfs.h> 31 #include <linux/vfs.h> 32 #include <linux/random.h> 33 #include <linux/mount.h> 34 #include <linux/namei.h> 35 #include <linux/quotaops.h> 36 #include <linux/seq_file.h> 37 #include <linux/proc_fs.h> 38 #include <linux/marker.h> 39 #include <linux/log2.h> 40 #include <linux/crc16.h> 41 #include <asm/uaccess.h> 42 43 #include "ext4.h" 44 #include "ext4_jbd2.h" 45 #include "xattr.h" 46 #include "acl.h" 47 #include "namei.h" 48 #include "group.h" 49 50 struct proc_dir_entry *ext4_proc_root; 51 52 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 53 unsigned long journal_devnum); 54 static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 55 unsigned int); 56 static void ext4_commit_super(struct super_block *sb, 57 struct ext4_super_block *es, int sync); 58 static void ext4_mark_recovery_complete(struct super_block *sb, 59 struct ext4_super_block *es); 60 static void ext4_clear_journal_err(struct super_block *sb, 61 struct ext4_super_block *es); 62 static int ext4_sync_fs(struct super_block *sb, int wait); 63 static const char *ext4_decode_error(struct super_block *sb, int errno, 64 char nbuf[16]); 65 static int ext4_remount(struct super_block *sb, int *flags, char *data); 66 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 67 static void ext4_unlockfs(struct super_block *sb); 68 static void ext4_write_super(struct super_block *sb); 69 static void ext4_write_super_lockfs(struct super_block *sb); 70 71 72 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 73 struct ext4_group_desc *bg) 74 { 75 return le32_to_cpu(bg->bg_block_bitmap_lo) | 76 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 77 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 78 } 79 80 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 81 struct ext4_group_desc *bg) 82 { 83 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 84 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 85 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 86 } 87 88 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 89 struct ext4_group_desc *bg) 90 { 91 return le32_to_cpu(bg->bg_inode_table_lo) | 92 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 93 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 94 } 95 96 void ext4_block_bitmap_set(struct super_block *sb, 97 struct ext4_group_desc *bg, ext4_fsblk_t blk) 98 { 99 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 100 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 101 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 102 } 103 104 void ext4_inode_bitmap_set(struct super_block *sb, 105 struct ext4_group_desc *bg, ext4_fsblk_t blk) 106 { 107 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 108 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 109 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 110 } 111 112 void ext4_inode_table_set(struct super_block *sb, 113 struct ext4_group_desc *bg, ext4_fsblk_t blk) 114 { 115 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 116 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 117 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 118 } 119 120 /* 121 * Wrappers for jbd2_journal_start/end. 122 * 123 * The only special thing we need to do here is to make sure that all 124 * journal_end calls result in the superblock being marked dirty, so 125 * that sync() will call the filesystem's write_super callback if 126 * appropriate. 127 */ 128 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 129 { 130 journal_t *journal; 131 132 if (sb->s_flags & MS_RDONLY) 133 return ERR_PTR(-EROFS); 134 135 /* Special case here: if the journal has aborted behind our 136 * backs (eg. EIO in the commit thread), then we still need to 137 * take the FS itself readonly cleanly. */ 138 journal = EXT4_SB(sb)->s_journal; 139 if (is_journal_aborted(journal)) { 140 ext4_abort(sb, __func__, 141 "Detected aborted journal"); 142 return ERR_PTR(-EROFS); 143 } 144 145 return jbd2_journal_start(journal, nblocks); 146 } 147 148 /* 149 * The only special thing we need to do here is to make sure that all 150 * jbd2_journal_stop calls result in the superblock being marked dirty, so 151 * that sync() will call the filesystem's write_super callback if 152 * appropriate. 153 */ 154 int __ext4_journal_stop(const char *where, handle_t *handle) 155 { 156 struct super_block *sb; 157 int err; 158 int rc; 159 160 sb = handle->h_transaction->t_journal->j_private; 161 err = handle->h_err; 162 rc = jbd2_journal_stop(handle); 163 164 if (!err) 165 err = rc; 166 if (err) 167 __ext4_std_error(sb, where, err); 168 return err; 169 } 170 171 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 172 struct buffer_head *bh, handle_t *handle, int err) 173 { 174 char nbuf[16]; 175 const char *errstr = ext4_decode_error(NULL, err, nbuf); 176 177 if (bh) 178 BUFFER_TRACE(bh, "abort"); 179 180 if (!handle->h_err) 181 handle->h_err = err; 182 183 if (is_handle_aborted(handle)) 184 return; 185 186 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 187 caller, errstr, err_fn); 188 189 jbd2_journal_abort_handle(handle); 190 } 191 192 /* Deal with the reporting of failure conditions on a filesystem such as 193 * inconsistencies detected or read IO failures. 194 * 195 * On ext2, we can store the error state of the filesystem in the 196 * superblock. That is not possible on ext4, because we may have other 197 * write ordering constraints on the superblock which prevent us from 198 * writing it out straight away; and given that the journal is about to 199 * be aborted, we can't rely on the current, or future, transactions to 200 * write out the superblock safely. 201 * 202 * We'll just use the jbd2_journal_abort() error code to record an error in 203 * the journal instead. On recovery, the journal will compain about 204 * that error until we've noted it down and cleared it. 205 */ 206 207 static void ext4_handle_error(struct super_block *sb) 208 { 209 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 210 211 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 212 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 213 214 if (sb->s_flags & MS_RDONLY) 215 return; 216 217 if (!test_opt(sb, ERRORS_CONT)) { 218 journal_t *journal = EXT4_SB(sb)->s_journal; 219 220 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 221 if (journal) 222 jbd2_journal_abort(journal, -EIO); 223 } 224 if (test_opt(sb, ERRORS_RO)) { 225 printk(KERN_CRIT "Remounting filesystem read-only\n"); 226 sb->s_flags |= MS_RDONLY; 227 } 228 ext4_commit_super(sb, es, 1); 229 if (test_opt(sb, ERRORS_PANIC)) 230 panic("EXT4-fs (device %s): panic forced after error\n", 231 sb->s_id); 232 } 233 234 void ext4_error(struct super_block *sb, const char *function, 235 const char *fmt, ...) 236 { 237 va_list args; 238 239 va_start(args, fmt); 240 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 241 vprintk(fmt, args); 242 printk("\n"); 243 va_end(args); 244 245 ext4_handle_error(sb); 246 } 247 248 static const char *ext4_decode_error(struct super_block *sb, int errno, 249 char nbuf[16]) 250 { 251 char *errstr = NULL; 252 253 switch (errno) { 254 case -EIO: 255 errstr = "IO failure"; 256 break; 257 case -ENOMEM: 258 errstr = "Out of memory"; 259 break; 260 case -EROFS: 261 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) 262 errstr = "Journal has aborted"; 263 else 264 errstr = "Readonly filesystem"; 265 break; 266 default: 267 /* If the caller passed in an extra buffer for unknown 268 * errors, textualise them now. Else we just return 269 * NULL. */ 270 if (nbuf) { 271 /* Check for truncated error codes... */ 272 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 273 errstr = nbuf; 274 } 275 break; 276 } 277 278 return errstr; 279 } 280 281 /* __ext4_std_error decodes expected errors from journaling functions 282 * automatically and invokes the appropriate error response. */ 283 284 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 285 { 286 char nbuf[16]; 287 const char *errstr; 288 289 /* Special case: if the error is EROFS, and we're not already 290 * inside a transaction, then there's really no point in logging 291 * an error. */ 292 if (errno == -EROFS && journal_current_handle() == NULL && 293 (sb->s_flags & MS_RDONLY)) 294 return; 295 296 errstr = ext4_decode_error(sb, errno, nbuf); 297 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 298 sb->s_id, function, errstr); 299 300 ext4_handle_error(sb); 301 } 302 303 /* 304 * ext4_abort is a much stronger failure handler than ext4_error. The 305 * abort function may be used to deal with unrecoverable failures such 306 * as journal IO errors or ENOMEM at a critical moment in log management. 307 * 308 * We unconditionally force the filesystem into an ABORT|READONLY state, 309 * unless the error response on the fs has been set to panic in which 310 * case we take the easy way out and panic immediately. 311 */ 312 313 void ext4_abort(struct super_block *sb, const char *function, 314 const char *fmt, ...) 315 { 316 va_list args; 317 318 printk(KERN_CRIT "ext4_abort called.\n"); 319 320 va_start(args, fmt); 321 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 322 vprintk(fmt, args); 323 printk("\n"); 324 va_end(args); 325 326 if (test_opt(sb, ERRORS_PANIC)) 327 panic("EXT4-fs panic from previous error\n"); 328 329 if (sb->s_flags & MS_RDONLY) 330 return; 331 332 printk(KERN_CRIT "Remounting filesystem read-only\n"); 333 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 334 sb->s_flags |= MS_RDONLY; 335 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 336 if (EXT4_SB(sb)->s_journal) 337 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 338 } 339 340 void ext4_warning(struct super_block *sb, const char *function, 341 const char *fmt, ...) 342 { 343 va_list args; 344 345 va_start(args, fmt); 346 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 347 sb->s_id, function); 348 vprintk(fmt, args); 349 printk("\n"); 350 va_end(args); 351 } 352 353 void ext4_update_dynamic_rev(struct super_block *sb) 354 { 355 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 356 357 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 358 return; 359 360 ext4_warning(sb, __func__, 361 "updating to rev %d because of new feature flag, " 362 "running e2fsck is recommended", 363 EXT4_DYNAMIC_REV); 364 365 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 366 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 367 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 368 /* leave es->s_feature_*compat flags alone */ 369 /* es->s_uuid will be set by e2fsck if empty */ 370 371 /* 372 * The rest of the superblock fields should be zero, and if not it 373 * means they are likely already in use, so leave them alone. We 374 * can leave it up to e2fsck to clean up any inconsistencies there. 375 */ 376 } 377 378 /* 379 * Open the external journal device 380 */ 381 static struct block_device *ext4_blkdev_get(dev_t dev) 382 { 383 struct block_device *bdev; 384 char b[BDEVNAME_SIZE]; 385 386 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 387 if (IS_ERR(bdev)) 388 goto fail; 389 return bdev; 390 391 fail: 392 printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n", 393 __bdevname(dev, b), PTR_ERR(bdev)); 394 return NULL; 395 } 396 397 /* 398 * Release the journal device 399 */ 400 static int ext4_blkdev_put(struct block_device *bdev) 401 { 402 bd_release(bdev); 403 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 404 } 405 406 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 407 { 408 struct block_device *bdev; 409 int ret = -ENODEV; 410 411 bdev = sbi->journal_bdev; 412 if (bdev) { 413 ret = ext4_blkdev_put(bdev); 414 sbi->journal_bdev = NULL; 415 } 416 return ret; 417 } 418 419 static inline struct inode *orphan_list_entry(struct list_head *l) 420 { 421 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 422 } 423 424 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 425 { 426 struct list_head *l; 427 428 printk(KERN_ERR "sb orphan head is %d\n", 429 le32_to_cpu(sbi->s_es->s_last_orphan)); 430 431 printk(KERN_ERR "sb_info orphan list:\n"); 432 list_for_each(l, &sbi->s_orphan) { 433 struct inode *inode = orphan_list_entry(l); 434 printk(KERN_ERR " " 435 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 436 inode->i_sb->s_id, inode->i_ino, inode, 437 inode->i_mode, inode->i_nlink, 438 NEXT_ORPHAN(inode)); 439 } 440 } 441 442 static void ext4_put_super(struct super_block *sb) 443 { 444 struct ext4_sb_info *sbi = EXT4_SB(sb); 445 struct ext4_super_block *es = sbi->s_es; 446 int i, err; 447 448 ext4_mb_release(sb); 449 ext4_ext_release(sb); 450 ext4_xattr_put_super(sb); 451 err = jbd2_journal_destroy(sbi->s_journal); 452 sbi->s_journal = NULL; 453 if (err < 0) 454 ext4_abort(sb, __func__, "Couldn't clean up the journal"); 455 456 if (!(sb->s_flags & MS_RDONLY)) { 457 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 458 es->s_state = cpu_to_le16(sbi->s_mount_state); 459 ext4_commit_super(sb, es, 1); 460 } 461 if (sbi->s_proc) { 462 remove_proc_entry("inode_readahead_blks", sbi->s_proc); 463 remove_proc_entry(sb->s_id, ext4_proc_root); 464 } 465 466 for (i = 0; i < sbi->s_gdb_count; i++) 467 brelse(sbi->s_group_desc[i]); 468 kfree(sbi->s_group_desc); 469 kfree(sbi->s_flex_groups); 470 percpu_counter_destroy(&sbi->s_freeblocks_counter); 471 percpu_counter_destroy(&sbi->s_freeinodes_counter); 472 percpu_counter_destroy(&sbi->s_dirs_counter); 473 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 474 brelse(sbi->s_sbh); 475 #ifdef CONFIG_QUOTA 476 for (i = 0; i < MAXQUOTAS; i++) 477 kfree(sbi->s_qf_names[i]); 478 #endif 479 480 /* Debugging code just in case the in-memory inode orphan list 481 * isn't empty. The on-disk one can be non-empty if we've 482 * detected an error and taken the fs readonly, but the 483 * in-memory list had better be clean by this point. */ 484 if (!list_empty(&sbi->s_orphan)) 485 dump_orphan_list(sb, sbi); 486 J_ASSERT(list_empty(&sbi->s_orphan)); 487 488 invalidate_bdev(sb->s_bdev); 489 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 490 /* 491 * Invalidate the journal device's buffers. We don't want them 492 * floating about in memory - the physical journal device may 493 * hotswapped, and it breaks the `ro-after' testing code. 494 */ 495 sync_blockdev(sbi->journal_bdev); 496 invalidate_bdev(sbi->journal_bdev); 497 ext4_blkdev_remove(sbi); 498 } 499 sb->s_fs_info = NULL; 500 kfree(sbi); 501 return; 502 } 503 504 static struct kmem_cache *ext4_inode_cachep; 505 506 /* 507 * Called inside transaction, so use GFP_NOFS 508 */ 509 static struct inode *ext4_alloc_inode(struct super_block *sb) 510 { 511 struct ext4_inode_info *ei; 512 513 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 514 if (!ei) 515 return NULL; 516 #ifdef CONFIG_EXT4_FS_POSIX_ACL 517 ei->i_acl = EXT4_ACL_NOT_CACHED; 518 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 519 #endif 520 ei->vfs_inode.i_version = 1; 521 ei->vfs_inode.i_data.writeback_index = 0; 522 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 523 INIT_LIST_HEAD(&ei->i_prealloc_list); 524 spin_lock_init(&ei->i_prealloc_lock); 525 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 526 ei->i_reserved_data_blocks = 0; 527 ei->i_reserved_meta_blocks = 0; 528 ei->i_allocated_meta_blocks = 0; 529 ei->i_delalloc_reserved_flag = 0; 530 spin_lock_init(&(ei->i_block_reservation_lock)); 531 return &ei->vfs_inode; 532 } 533 534 static void ext4_destroy_inode(struct inode *inode) 535 { 536 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 537 printk("EXT4 Inode %p: orphan list check failed!\n", 538 EXT4_I(inode)); 539 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 540 EXT4_I(inode), sizeof(struct ext4_inode_info), 541 true); 542 dump_stack(); 543 } 544 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 545 } 546 547 static void init_once(void *foo) 548 { 549 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 550 551 INIT_LIST_HEAD(&ei->i_orphan); 552 #ifdef CONFIG_EXT4_FS_XATTR 553 init_rwsem(&ei->xattr_sem); 554 #endif 555 init_rwsem(&ei->i_data_sem); 556 inode_init_once(&ei->vfs_inode); 557 } 558 559 static int init_inodecache(void) 560 { 561 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 562 sizeof(struct ext4_inode_info), 563 0, (SLAB_RECLAIM_ACCOUNT| 564 SLAB_MEM_SPREAD), 565 init_once); 566 if (ext4_inode_cachep == NULL) 567 return -ENOMEM; 568 return 0; 569 } 570 571 static void destroy_inodecache(void) 572 { 573 kmem_cache_destroy(ext4_inode_cachep); 574 } 575 576 static void ext4_clear_inode(struct inode *inode) 577 { 578 #ifdef CONFIG_EXT4_FS_POSIX_ACL 579 if (EXT4_I(inode)->i_acl && 580 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 581 posix_acl_release(EXT4_I(inode)->i_acl); 582 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; 583 } 584 if (EXT4_I(inode)->i_default_acl && 585 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { 586 posix_acl_release(EXT4_I(inode)->i_default_acl); 587 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 588 } 589 #endif 590 ext4_discard_preallocations(inode); 591 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 592 &EXT4_I(inode)->jinode); 593 } 594 595 static inline void ext4_show_quota_options(struct seq_file *seq, 596 struct super_block *sb) 597 { 598 #if defined(CONFIG_QUOTA) 599 struct ext4_sb_info *sbi = EXT4_SB(sb); 600 601 if (sbi->s_jquota_fmt) 602 seq_printf(seq, ",jqfmt=%s", 603 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 604 605 if (sbi->s_qf_names[USRQUOTA]) 606 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 607 608 if (sbi->s_qf_names[GRPQUOTA]) 609 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 610 611 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 612 seq_puts(seq, ",usrquota"); 613 614 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 615 seq_puts(seq, ",grpquota"); 616 #endif 617 } 618 619 /* 620 * Show an option if 621 * - it's set to a non-default value OR 622 * - if the per-sb default is different from the global default 623 */ 624 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 625 { 626 int def_errors; 627 unsigned long def_mount_opts; 628 struct super_block *sb = vfs->mnt_sb; 629 struct ext4_sb_info *sbi = EXT4_SB(sb); 630 struct ext4_super_block *es = sbi->s_es; 631 632 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 633 def_errors = le16_to_cpu(es->s_errors); 634 635 if (sbi->s_sb_block != 1) 636 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 637 if (test_opt(sb, MINIX_DF)) 638 seq_puts(seq, ",minixdf"); 639 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 640 seq_puts(seq, ",grpid"); 641 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 642 seq_puts(seq, ",nogrpid"); 643 if (sbi->s_resuid != EXT4_DEF_RESUID || 644 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 645 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 646 } 647 if (sbi->s_resgid != EXT4_DEF_RESGID || 648 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 649 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 650 } 651 if (test_opt(sb, ERRORS_RO)) { 652 if (def_errors == EXT4_ERRORS_PANIC || 653 def_errors == EXT4_ERRORS_CONTINUE) { 654 seq_puts(seq, ",errors=remount-ro"); 655 } 656 } 657 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 658 seq_puts(seq, ",errors=continue"); 659 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 660 seq_puts(seq, ",errors=panic"); 661 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 662 seq_puts(seq, ",nouid32"); 663 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 664 seq_puts(seq, ",debug"); 665 if (test_opt(sb, OLDALLOC)) 666 seq_puts(seq, ",oldalloc"); 667 #ifdef CONFIG_EXT4_FS_XATTR 668 if (test_opt(sb, XATTR_USER) && 669 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 670 seq_puts(seq, ",user_xattr"); 671 if (!test_opt(sb, XATTR_USER) && 672 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 673 seq_puts(seq, ",nouser_xattr"); 674 } 675 #endif 676 #ifdef CONFIG_EXT4_FS_POSIX_ACL 677 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 678 seq_puts(seq, ",acl"); 679 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 680 seq_puts(seq, ",noacl"); 681 #endif 682 if (!test_opt(sb, RESERVATION)) 683 seq_puts(seq, ",noreservation"); 684 if (sbi->s_commit_interval) { 685 seq_printf(seq, ",commit=%u", 686 (unsigned) (sbi->s_commit_interval / HZ)); 687 } 688 /* 689 * We're changing the default of barrier mount option, so 690 * let's always display its mount state so it's clear what its 691 * status is. 692 */ 693 seq_puts(seq, ",barrier="); 694 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 695 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 696 seq_puts(seq, ",journal_async_commit"); 697 if (test_opt(sb, NOBH)) 698 seq_puts(seq, ",nobh"); 699 if (!test_opt(sb, EXTENTS)) 700 seq_puts(seq, ",noextents"); 701 if (test_opt(sb, I_VERSION)) 702 seq_puts(seq, ",i_version"); 703 if (!test_opt(sb, DELALLOC)) 704 seq_puts(seq, ",nodelalloc"); 705 706 707 if (sbi->s_stripe) 708 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 709 /* 710 * journal mode get enabled in different ways 711 * So just print the value even if we didn't specify it 712 */ 713 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 714 seq_puts(seq, ",data=journal"); 715 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 716 seq_puts(seq, ",data=ordered"); 717 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 718 seq_puts(seq, ",data=writeback"); 719 720 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 721 seq_printf(seq, ",inode_readahead_blks=%u", 722 sbi->s_inode_readahead_blks); 723 724 if (test_opt(sb, DATA_ERR_ABORT)) 725 seq_puts(seq, ",data_err=abort"); 726 727 ext4_show_quota_options(seq, sb); 728 return 0; 729 } 730 731 732 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 733 u64 ino, u32 generation) 734 { 735 struct inode *inode; 736 737 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 738 return ERR_PTR(-ESTALE); 739 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 740 return ERR_PTR(-ESTALE); 741 742 /* iget isn't really right if the inode is currently unallocated!! 743 * 744 * ext4_read_inode will return a bad_inode if the inode had been 745 * deleted, so we should be safe. 746 * 747 * Currently we don't know the generation for parent directory, so 748 * a generation of 0 means "accept any" 749 */ 750 inode = ext4_iget(sb, ino); 751 if (IS_ERR(inode)) 752 return ERR_CAST(inode); 753 if (generation && inode->i_generation != generation) { 754 iput(inode); 755 return ERR_PTR(-ESTALE); 756 } 757 758 return inode; 759 } 760 761 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 762 int fh_len, int fh_type) 763 { 764 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 765 ext4_nfs_get_inode); 766 } 767 768 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 769 int fh_len, int fh_type) 770 { 771 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 772 ext4_nfs_get_inode); 773 } 774 775 #ifdef CONFIG_QUOTA 776 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 777 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 778 779 static int ext4_dquot_initialize(struct inode *inode, int type); 780 static int ext4_dquot_drop(struct inode *inode); 781 static int ext4_write_dquot(struct dquot *dquot); 782 static int ext4_acquire_dquot(struct dquot *dquot); 783 static int ext4_release_dquot(struct dquot *dquot); 784 static int ext4_mark_dquot_dirty(struct dquot *dquot); 785 static int ext4_write_info(struct super_block *sb, int type); 786 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 787 char *path, int remount); 788 static int ext4_quota_on_mount(struct super_block *sb, int type); 789 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 790 size_t len, loff_t off); 791 static ssize_t ext4_quota_write(struct super_block *sb, int type, 792 const char *data, size_t len, loff_t off); 793 794 static struct dquot_operations ext4_quota_operations = { 795 .initialize = ext4_dquot_initialize, 796 .drop = ext4_dquot_drop, 797 .alloc_space = dquot_alloc_space, 798 .alloc_inode = dquot_alloc_inode, 799 .free_space = dquot_free_space, 800 .free_inode = dquot_free_inode, 801 .transfer = dquot_transfer, 802 .write_dquot = ext4_write_dquot, 803 .acquire_dquot = ext4_acquire_dquot, 804 .release_dquot = ext4_release_dquot, 805 .mark_dirty = ext4_mark_dquot_dirty, 806 .write_info = ext4_write_info 807 }; 808 809 static struct quotactl_ops ext4_qctl_operations = { 810 .quota_on = ext4_quota_on, 811 .quota_off = vfs_quota_off, 812 .quota_sync = vfs_quota_sync, 813 .get_info = vfs_get_dqinfo, 814 .set_info = vfs_set_dqinfo, 815 .get_dqblk = vfs_get_dqblk, 816 .set_dqblk = vfs_set_dqblk 817 }; 818 #endif 819 820 static const struct super_operations ext4_sops = { 821 .alloc_inode = ext4_alloc_inode, 822 .destroy_inode = ext4_destroy_inode, 823 .write_inode = ext4_write_inode, 824 .dirty_inode = ext4_dirty_inode, 825 .delete_inode = ext4_delete_inode, 826 .put_super = ext4_put_super, 827 .write_super = ext4_write_super, 828 .sync_fs = ext4_sync_fs, 829 .write_super_lockfs = ext4_write_super_lockfs, 830 .unlockfs = ext4_unlockfs, 831 .statfs = ext4_statfs, 832 .remount_fs = ext4_remount, 833 .clear_inode = ext4_clear_inode, 834 .show_options = ext4_show_options, 835 #ifdef CONFIG_QUOTA 836 .quota_read = ext4_quota_read, 837 .quota_write = ext4_quota_write, 838 #endif 839 }; 840 841 static const struct export_operations ext4_export_ops = { 842 .fh_to_dentry = ext4_fh_to_dentry, 843 .fh_to_parent = ext4_fh_to_parent, 844 .get_parent = ext4_get_parent, 845 }; 846 847 enum { 848 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 849 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 850 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 851 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 852 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 853 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 854 Opt_journal_checksum, Opt_journal_async_commit, 855 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 856 Opt_data_err_abort, Opt_data_err_ignore, 857 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 858 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 859 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 860 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 861 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 862 Opt_inode_readahead_blks 863 }; 864 865 static const match_table_t tokens = { 866 {Opt_bsd_df, "bsddf"}, 867 {Opt_minix_df, "minixdf"}, 868 {Opt_grpid, "grpid"}, 869 {Opt_grpid, "bsdgroups"}, 870 {Opt_nogrpid, "nogrpid"}, 871 {Opt_nogrpid, "sysvgroups"}, 872 {Opt_resgid, "resgid=%u"}, 873 {Opt_resuid, "resuid=%u"}, 874 {Opt_sb, "sb=%u"}, 875 {Opt_err_cont, "errors=continue"}, 876 {Opt_err_panic, "errors=panic"}, 877 {Opt_err_ro, "errors=remount-ro"}, 878 {Opt_nouid32, "nouid32"}, 879 {Opt_debug, "debug"}, 880 {Opt_oldalloc, "oldalloc"}, 881 {Opt_orlov, "orlov"}, 882 {Opt_user_xattr, "user_xattr"}, 883 {Opt_nouser_xattr, "nouser_xattr"}, 884 {Opt_acl, "acl"}, 885 {Opt_noacl, "noacl"}, 886 {Opt_reservation, "reservation"}, 887 {Opt_noreservation, "noreservation"}, 888 {Opt_noload, "noload"}, 889 {Opt_nobh, "nobh"}, 890 {Opt_bh, "bh"}, 891 {Opt_commit, "commit=%u"}, 892 {Opt_journal_update, "journal=update"}, 893 {Opt_journal_inum, "journal=%u"}, 894 {Opt_journal_dev, "journal_dev=%u"}, 895 {Opt_journal_checksum, "journal_checksum"}, 896 {Opt_journal_async_commit, "journal_async_commit"}, 897 {Opt_abort, "abort"}, 898 {Opt_data_journal, "data=journal"}, 899 {Opt_data_ordered, "data=ordered"}, 900 {Opt_data_writeback, "data=writeback"}, 901 {Opt_data_err_abort, "data_err=abort"}, 902 {Opt_data_err_ignore, "data_err=ignore"}, 903 {Opt_offusrjquota, "usrjquota="}, 904 {Opt_usrjquota, "usrjquota=%s"}, 905 {Opt_offgrpjquota, "grpjquota="}, 906 {Opt_grpjquota, "grpjquota=%s"}, 907 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 908 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 909 {Opt_grpquota, "grpquota"}, 910 {Opt_noquota, "noquota"}, 911 {Opt_quota, "quota"}, 912 {Opt_usrquota, "usrquota"}, 913 {Opt_barrier, "barrier=%u"}, 914 {Opt_extents, "extents"}, 915 {Opt_noextents, "noextents"}, 916 {Opt_i_version, "i_version"}, 917 {Opt_stripe, "stripe=%u"}, 918 {Opt_resize, "resize"}, 919 {Opt_delalloc, "delalloc"}, 920 {Opt_nodelalloc, "nodelalloc"}, 921 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 922 {Opt_err, NULL}, 923 }; 924 925 static ext4_fsblk_t get_sb_block(void **data) 926 { 927 ext4_fsblk_t sb_block; 928 char *options = (char *) *data; 929 930 if (!options || strncmp(options, "sb=", 3) != 0) 931 return 1; /* Default location */ 932 options += 3; 933 /*todo: use simple_strtoll with >32bit ext4 */ 934 sb_block = simple_strtoul(options, &options, 0); 935 if (*options && *options != ',') { 936 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 937 (char *) *data); 938 return 1; 939 } 940 if (*options == ',') 941 options++; 942 *data = (void *) options; 943 return sb_block; 944 } 945 946 static int parse_options(char *options, struct super_block *sb, 947 unsigned int *inum, unsigned long *journal_devnum, 948 ext4_fsblk_t *n_blocks_count, int is_remount) 949 { 950 struct ext4_sb_info *sbi = EXT4_SB(sb); 951 char *p; 952 substring_t args[MAX_OPT_ARGS]; 953 int data_opt = 0; 954 int option; 955 #ifdef CONFIG_QUOTA 956 int qtype, qfmt; 957 char *qname; 958 #endif 959 ext4_fsblk_t last_block; 960 961 if (!options) 962 return 1; 963 964 while ((p = strsep(&options, ",")) != NULL) { 965 int token; 966 if (!*p) 967 continue; 968 969 token = match_token(p, tokens, args); 970 switch (token) { 971 case Opt_bsd_df: 972 clear_opt(sbi->s_mount_opt, MINIX_DF); 973 break; 974 case Opt_minix_df: 975 set_opt(sbi->s_mount_opt, MINIX_DF); 976 break; 977 case Opt_grpid: 978 set_opt(sbi->s_mount_opt, GRPID); 979 break; 980 case Opt_nogrpid: 981 clear_opt(sbi->s_mount_opt, GRPID); 982 break; 983 case Opt_resuid: 984 if (match_int(&args[0], &option)) 985 return 0; 986 sbi->s_resuid = option; 987 break; 988 case Opt_resgid: 989 if (match_int(&args[0], &option)) 990 return 0; 991 sbi->s_resgid = option; 992 break; 993 case Opt_sb: 994 /* handled by get_sb_block() instead of here */ 995 /* *sb_block = match_int(&args[0]); */ 996 break; 997 case Opt_err_panic: 998 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 999 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1000 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1001 break; 1002 case Opt_err_ro: 1003 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1004 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1005 set_opt(sbi->s_mount_opt, ERRORS_RO); 1006 break; 1007 case Opt_err_cont: 1008 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1009 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1010 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1011 break; 1012 case Opt_nouid32: 1013 set_opt(sbi->s_mount_opt, NO_UID32); 1014 break; 1015 case Opt_debug: 1016 set_opt(sbi->s_mount_opt, DEBUG); 1017 break; 1018 case Opt_oldalloc: 1019 set_opt(sbi->s_mount_opt, OLDALLOC); 1020 break; 1021 case Opt_orlov: 1022 clear_opt(sbi->s_mount_opt, OLDALLOC); 1023 break; 1024 #ifdef CONFIG_EXT4_FS_XATTR 1025 case Opt_user_xattr: 1026 set_opt(sbi->s_mount_opt, XATTR_USER); 1027 break; 1028 case Opt_nouser_xattr: 1029 clear_opt(sbi->s_mount_opt, XATTR_USER); 1030 break; 1031 #else 1032 case Opt_user_xattr: 1033 case Opt_nouser_xattr: 1034 printk(KERN_ERR "EXT4 (no)user_xattr options " 1035 "not supported\n"); 1036 break; 1037 #endif 1038 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1039 case Opt_acl: 1040 set_opt(sbi->s_mount_opt, POSIX_ACL); 1041 break; 1042 case Opt_noacl: 1043 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1044 break; 1045 #else 1046 case Opt_acl: 1047 case Opt_noacl: 1048 printk(KERN_ERR "EXT4 (no)acl options " 1049 "not supported\n"); 1050 break; 1051 #endif 1052 case Opt_reservation: 1053 set_opt(sbi->s_mount_opt, RESERVATION); 1054 break; 1055 case Opt_noreservation: 1056 clear_opt(sbi->s_mount_opt, RESERVATION); 1057 break; 1058 case Opt_journal_update: 1059 /* @@@ FIXME */ 1060 /* Eventually we will want to be able to create 1061 a journal file here. For now, only allow the 1062 user to specify an existing inode to be the 1063 journal file. */ 1064 if (is_remount) { 1065 printk(KERN_ERR "EXT4-fs: cannot specify " 1066 "journal on remount\n"); 1067 return 0; 1068 } 1069 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1070 break; 1071 case Opt_journal_inum: 1072 if (is_remount) { 1073 printk(KERN_ERR "EXT4-fs: cannot specify " 1074 "journal on remount\n"); 1075 return 0; 1076 } 1077 if (match_int(&args[0], &option)) 1078 return 0; 1079 *inum = option; 1080 break; 1081 case Opt_journal_dev: 1082 if (is_remount) { 1083 printk(KERN_ERR "EXT4-fs: cannot specify " 1084 "journal on remount\n"); 1085 return 0; 1086 } 1087 if (match_int(&args[0], &option)) 1088 return 0; 1089 *journal_devnum = option; 1090 break; 1091 case Opt_journal_checksum: 1092 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1093 break; 1094 case Opt_journal_async_commit: 1095 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1096 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1097 break; 1098 case Opt_noload: 1099 set_opt(sbi->s_mount_opt, NOLOAD); 1100 break; 1101 case Opt_commit: 1102 if (match_int(&args[0], &option)) 1103 return 0; 1104 if (option < 0) 1105 return 0; 1106 if (option == 0) 1107 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1108 sbi->s_commit_interval = HZ * option; 1109 break; 1110 case Opt_data_journal: 1111 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1112 goto datacheck; 1113 case Opt_data_ordered: 1114 data_opt = EXT4_MOUNT_ORDERED_DATA; 1115 goto datacheck; 1116 case Opt_data_writeback: 1117 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1118 datacheck: 1119 if (is_remount) { 1120 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 1121 != data_opt) { 1122 printk(KERN_ERR 1123 "EXT4-fs: cannot change data " 1124 "mode on remount\n"); 1125 return 0; 1126 } 1127 } else { 1128 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 1129 sbi->s_mount_opt |= data_opt; 1130 } 1131 break; 1132 case Opt_data_err_abort: 1133 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1134 break; 1135 case Opt_data_err_ignore: 1136 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1137 break; 1138 #ifdef CONFIG_QUOTA 1139 case Opt_usrjquota: 1140 qtype = USRQUOTA; 1141 goto set_qf_name; 1142 case Opt_grpjquota: 1143 qtype = GRPQUOTA; 1144 set_qf_name: 1145 if ((sb_any_quota_enabled(sb) || 1146 sb_any_quota_suspended(sb)) && 1147 !sbi->s_qf_names[qtype]) { 1148 printk(KERN_ERR 1149 "EXT4-fs: Cannot change journaled " 1150 "quota options when quota turned on.\n"); 1151 return 0; 1152 } 1153 qname = match_strdup(&args[0]); 1154 if (!qname) { 1155 printk(KERN_ERR 1156 "EXT4-fs: not enough memory for " 1157 "storing quotafile name.\n"); 1158 return 0; 1159 } 1160 if (sbi->s_qf_names[qtype] && 1161 strcmp(sbi->s_qf_names[qtype], qname)) { 1162 printk(KERN_ERR 1163 "EXT4-fs: %s quota file already " 1164 "specified.\n", QTYPE2NAME(qtype)); 1165 kfree(qname); 1166 return 0; 1167 } 1168 sbi->s_qf_names[qtype] = qname; 1169 if (strchr(sbi->s_qf_names[qtype], '/')) { 1170 printk(KERN_ERR 1171 "EXT4-fs: quotafile must be on " 1172 "filesystem root.\n"); 1173 kfree(sbi->s_qf_names[qtype]); 1174 sbi->s_qf_names[qtype] = NULL; 1175 return 0; 1176 } 1177 set_opt(sbi->s_mount_opt, QUOTA); 1178 break; 1179 case Opt_offusrjquota: 1180 qtype = USRQUOTA; 1181 goto clear_qf_name; 1182 case Opt_offgrpjquota: 1183 qtype = GRPQUOTA; 1184 clear_qf_name: 1185 if ((sb_any_quota_enabled(sb) || 1186 sb_any_quota_suspended(sb)) && 1187 sbi->s_qf_names[qtype]) { 1188 printk(KERN_ERR "EXT4-fs: Cannot change " 1189 "journaled quota options when " 1190 "quota turned on.\n"); 1191 return 0; 1192 } 1193 /* 1194 * The space will be released later when all options 1195 * are confirmed to be correct 1196 */ 1197 sbi->s_qf_names[qtype] = NULL; 1198 break; 1199 case Opt_jqfmt_vfsold: 1200 qfmt = QFMT_VFS_OLD; 1201 goto set_qf_format; 1202 case Opt_jqfmt_vfsv0: 1203 qfmt = QFMT_VFS_V0; 1204 set_qf_format: 1205 if ((sb_any_quota_enabled(sb) || 1206 sb_any_quota_suspended(sb)) && 1207 sbi->s_jquota_fmt != qfmt) { 1208 printk(KERN_ERR "EXT4-fs: Cannot change " 1209 "journaled quota options when " 1210 "quota turned on.\n"); 1211 return 0; 1212 } 1213 sbi->s_jquota_fmt = qfmt; 1214 break; 1215 case Opt_quota: 1216 case Opt_usrquota: 1217 set_opt(sbi->s_mount_opt, QUOTA); 1218 set_opt(sbi->s_mount_opt, USRQUOTA); 1219 break; 1220 case Opt_grpquota: 1221 set_opt(sbi->s_mount_opt, QUOTA); 1222 set_opt(sbi->s_mount_opt, GRPQUOTA); 1223 break; 1224 case Opt_noquota: 1225 if (sb_any_quota_enabled(sb)) { 1226 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1227 "options when quota turned on.\n"); 1228 return 0; 1229 } 1230 clear_opt(sbi->s_mount_opt, QUOTA); 1231 clear_opt(sbi->s_mount_opt, USRQUOTA); 1232 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1233 break; 1234 #else 1235 case Opt_quota: 1236 case Opt_usrquota: 1237 case Opt_grpquota: 1238 printk(KERN_ERR 1239 "EXT4-fs: quota options not supported.\n"); 1240 break; 1241 case Opt_usrjquota: 1242 case Opt_grpjquota: 1243 case Opt_offusrjquota: 1244 case Opt_offgrpjquota: 1245 case Opt_jqfmt_vfsold: 1246 case Opt_jqfmt_vfsv0: 1247 printk(KERN_ERR 1248 "EXT4-fs: journaled quota options not " 1249 "supported.\n"); 1250 break; 1251 case Opt_noquota: 1252 break; 1253 #endif 1254 case Opt_abort: 1255 set_opt(sbi->s_mount_opt, ABORT); 1256 break; 1257 case Opt_barrier: 1258 if (match_int(&args[0], &option)) 1259 return 0; 1260 if (option) 1261 set_opt(sbi->s_mount_opt, BARRIER); 1262 else 1263 clear_opt(sbi->s_mount_opt, BARRIER); 1264 break; 1265 case Opt_ignore: 1266 break; 1267 case Opt_resize: 1268 if (!is_remount) { 1269 printk("EXT4-fs: resize option only available " 1270 "for remount\n"); 1271 return 0; 1272 } 1273 if (match_int(&args[0], &option) != 0) 1274 return 0; 1275 *n_blocks_count = option; 1276 break; 1277 case Opt_nobh: 1278 set_opt(sbi->s_mount_opt, NOBH); 1279 break; 1280 case Opt_bh: 1281 clear_opt(sbi->s_mount_opt, NOBH); 1282 break; 1283 case Opt_extents: 1284 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, 1285 EXT4_FEATURE_INCOMPAT_EXTENTS)) { 1286 ext4_warning(sb, __func__, 1287 "extents feature not enabled " 1288 "on this filesystem, use tune2fs\n"); 1289 return 0; 1290 } 1291 set_opt(sbi->s_mount_opt, EXTENTS); 1292 break; 1293 case Opt_noextents: 1294 /* 1295 * When e2fsprogs support resizing an already existing 1296 * ext3 file system to greater than 2**32 we need to 1297 * add support to block allocator to handle growing 1298 * already existing block mapped inode so that blocks 1299 * allocated for them fall within 2**32 1300 */ 1301 last_block = ext4_blocks_count(sbi->s_es) - 1; 1302 if (last_block > 0xffffffffULL) { 1303 printk(KERN_ERR "EXT4-fs: Filesystem too " 1304 "large to mount with " 1305 "-o noextents options\n"); 1306 return 0; 1307 } 1308 clear_opt(sbi->s_mount_opt, EXTENTS); 1309 break; 1310 case Opt_i_version: 1311 set_opt(sbi->s_mount_opt, I_VERSION); 1312 sb->s_flags |= MS_I_VERSION; 1313 break; 1314 case Opt_nodelalloc: 1315 clear_opt(sbi->s_mount_opt, DELALLOC); 1316 break; 1317 case Opt_stripe: 1318 if (match_int(&args[0], &option)) 1319 return 0; 1320 if (option < 0) 1321 return 0; 1322 sbi->s_stripe = option; 1323 break; 1324 case Opt_delalloc: 1325 set_opt(sbi->s_mount_opt, DELALLOC); 1326 break; 1327 case Opt_inode_readahead_blks: 1328 if (match_int(&args[0], &option)) 1329 return 0; 1330 if (option < 0 || option > (1 << 30)) 1331 return 0; 1332 sbi->s_inode_readahead_blks = option; 1333 break; 1334 default: 1335 printk(KERN_ERR 1336 "EXT4-fs: Unrecognized mount option \"%s\" " 1337 "or missing value\n", p); 1338 return 0; 1339 } 1340 } 1341 #ifdef CONFIG_QUOTA 1342 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1343 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1344 sbi->s_qf_names[USRQUOTA]) 1345 clear_opt(sbi->s_mount_opt, USRQUOTA); 1346 1347 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1348 sbi->s_qf_names[GRPQUOTA]) 1349 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1350 1351 if ((sbi->s_qf_names[USRQUOTA] && 1352 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1353 (sbi->s_qf_names[GRPQUOTA] && 1354 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1355 printk(KERN_ERR "EXT4-fs: old and new quota " 1356 "format mixing.\n"); 1357 return 0; 1358 } 1359 1360 if (!sbi->s_jquota_fmt) { 1361 printk(KERN_ERR "EXT4-fs: journaled quota format " 1362 "not specified.\n"); 1363 return 0; 1364 } 1365 } else { 1366 if (sbi->s_jquota_fmt) { 1367 printk(KERN_ERR "EXT4-fs: journaled quota format " 1368 "specified with no journaling " 1369 "enabled.\n"); 1370 return 0; 1371 } 1372 } 1373 #endif 1374 return 1; 1375 } 1376 1377 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1378 int read_only) 1379 { 1380 struct ext4_sb_info *sbi = EXT4_SB(sb); 1381 int res = 0; 1382 1383 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1384 printk(KERN_ERR "EXT4-fs warning: revision level too high, " 1385 "forcing read-only mode\n"); 1386 res = MS_RDONLY; 1387 } 1388 if (read_only) 1389 return res; 1390 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1391 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1392 "running e2fsck is recommended\n"); 1393 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1394 printk(KERN_WARNING 1395 "EXT4-fs warning: mounting fs with errors, " 1396 "running e2fsck is recommended\n"); 1397 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1398 le16_to_cpu(es->s_mnt_count) >= 1399 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1400 printk(KERN_WARNING 1401 "EXT4-fs warning: maximal mount count reached, " 1402 "running e2fsck is recommended\n"); 1403 else if (le32_to_cpu(es->s_checkinterval) && 1404 (le32_to_cpu(es->s_lastcheck) + 1405 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1406 printk(KERN_WARNING 1407 "EXT4-fs warning: checktime reached, " 1408 "running e2fsck is recommended\n"); 1409 #if 0 1410 /* @@@ We _will_ want to clear the valid bit if we find 1411 * inconsistencies, to force a fsck at reboot. But for 1412 * a plain journaled filesystem we can keep it set as 1413 * valid forever! :) 1414 */ 1415 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1416 #endif 1417 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1418 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1419 le16_add_cpu(&es->s_mnt_count, 1); 1420 es->s_mtime = cpu_to_le32(get_seconds()); 1421 ext4_update_dynamic_rev(sb); 1422 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1423 1424 ext4_commit_super(sb, es, 1); 1425 if (test_opt(sb, DEBUG)) 1426 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " 1427 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1428 sb->s_blocksize, 1429 sbi->s_groups_count, 1430 EXT4_BLOCKS_PER_GROUP(sb), 1431 EXT4_INODES_PER_GROUP(sb), 1432 sbi->s_mount_opt); 1433 1434 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", 1435 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : 1436 "external", EXT4_SB(sb)->s_journal->j_devname); 1437 return res; 1438 } 1439 1440 static int ext4_fill_flex_info(struct super_block *sb) 1441 { 1442 struct ext4_sb_info *sbi = EXT4_SB(sb); 1443 struct ext4_group_desc *gdp = NULL; 1444 struct buffer_head *bh; 1445 ext4_group_t flex_group_count; 1446 ext4_group_t flex_group; 1447 int groups_per_flex = 0; 1448 __u64 block_bitmap = 0; 1449 int i; 1450 1451 if (!sbi->s_es->s_log_groups_per_flex) { 1452 sbi->s_log_groups_per_flex = 0; 1453 return 1; 1454 } 1455 1456 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1457 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1458 1459 /* We allocate both existing and potentially added groups */ 1460 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1461 ((sbi->s_es->s_reserved_gdt_blocks +1 ) << 1462 EXT4_DESC_PER_BLOCK_BITS(sb))) / 1463 groups_per_flex; 1464 sbi->s_flex_groups = kzalloc(flex_group_count * 1465 sizeof(struct flex_groups), GFP_KERNEL); 1466 if (sbi->s_flex_groups == NULL) { 1467 printk(KERN_ERR "EXT4-fs: not enough memory for " 1468 "%lu flex groups\n", flex_group_count); 1469 goto failed; 1470 } 1471 1472 gdp = ext4_get_group_desc(sb, 1, &bh); 1473 block_bitmap = ext4_block_bitmap(sb, gdp) - 1; 1474 1475 for (i = 0; i < sbi->s_groups_count; i++) { 1476 gdp = ext4_get_group_desc(sb, i, &bh); 1477 1478 flex_group = ext4_flex_group(sbi, i); 1479 sbi->s_flex_groups[flex_group].free_inodes += 1480 le16_to_cpu(gdp->bg_free_inodes_count); 1481 sbi->s_flex_groups[flex_group].free_blocks += 1482 le16_to_cpu(gdp->bg_free_blocks_count); 1483 } 1484 1485 return 1; 1486 failed: 1487 return 0; 1488 } 1489 1490 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1491 struct ext4_group_desc *gdp) 1492 { 1493 __u16 crc = 0; 1494 1495 if (sbi->s_es->s_feature_ro_compat & 1496 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1497 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1498 __le32 le_group = cpu_to_le32(block_group); 1499 1500 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1501 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1502 crc = crc16(crc, (__u8 *)gdp, offset); 1503 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1504 /* for checksum of struct ext4_group_desc do the rest...*/ 1505 if ((sbi->s_es->s_feature_incompat & 1506 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1507 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1508 crc = crc16(crc, (__u8 *)gdp + offset, 1509 le16_to_cpu(sbi->s_es->s_desc_size) - 1510 offset); 1511 } 1512 1513 return cpu_to_le16(crc); 1514 } 1515 1516 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1517 struct ext4_group_desc *gdp) 1518 { 1519 if ((sbi->s_es->s_feature_ro_compat & 1520 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1521 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1522 return 0; 1523 1524 return 1; 1525 } 1526 1527 /* Called at mount-time, super-block is locked */ 1528 static int ext4_check_descriptors(struct super_block *sb) 1529 { 1530 struct ext4_sb_info *sbi = EXT4_SB(sb); 1531 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1532 ext4_fsblk_t last_block; 1533 ext4_fsblk_t block_bitmap; 1534 ext4_fsblk_t inode_bitmap; 1535 ext4_fsblk_t inode_table; 1536 int flexbg_flag = 0; 1537 ext4_group_t i; 1538 1539 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1540 flexbg_flag = 1; 1541 1542 ext4_debug("Checking group descriptors"); 1543 1544 for (i = 0; i < sbi->s_groups_count; i++) { 1545 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1546 1547 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1548 last_block = ext4_blocks_count(sbi->s_es) - 1; 1549 else 1550 last_block = first_block + 1551 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1552 1553 block_bitmap = ext4_block_bitmap(sb, gdp); 1554 if (block_bitmap < first_block || block_bitmap > last_block) { 1555 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1556 "Block bitmap for group %lu not in group " 1557 "(block %llu)!\n", i, block_bitmap); 1558 return 0; 1559 } 1560 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1561 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1562 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1563 "Inode bitmap for group %lu not in group " 1564 "(block %llu)!\n", i, inode_bitmap); 1565 return 0; 1566 } 1567 inode_table = ext4_inode_table(sb, gdp); 1568 if (inode_table < first_block || 1569 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1570 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1571 "Inode table for group %lu not in group " 1572 "(block %llu)!\n", i, inode_table); 1573 return 0; 1574 } 1575 spin_lock(sb_bgl_lock(sbi, i)); 1576 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1577 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1578 "Checksum for group %lu failed (%u!=%u)\n", 1579 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1580 gdp)), le16_to_cpu(gdp->bg_checksum)); 1581 if (!(sb->s_flags & MS_RDONLY)) { 1582 spin_unlock(sb_bgl_lock(sbi, i)); 1583 return 0; 1584 } 1585 } 1586 spin_unlock(sb_bgl_lock(sbi, i)); 1587 if (!flexbg_flag) 1588 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1589 } 1590 1591 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1592 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 1593 return 1; 1594 } 1595 1596 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1597 * the superblock) which were deleted from all directories, but held open by 1598 * a process at the time of a crash. We walk the list and try to delete these 1599 * inodes at recovery time (only with a read-write filesystem). 1600 * 1601 * In order to keep the orphan inode chain consistent during traversal (in 1602 * case of crash during recovery), we link each inode into the superblock 1603 * orphan list_head and handle it the same way as an inode deletion during 1604 * normal operation (which journals the operations for us). 1605 * 1606 * We only do an iget() and an iput() on each inode, which is very safe if we 1607 * accidentally point at an in-use or already deleted inode. The worst that 1608 * can happen in this case is that we get a "bit already cleared" message from 1609 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1610 * e2fsck was run on this filesystem, and it must have already done the orphan 1611 * inode cleanup for us, so we can safely abort without any further action. 1612 */ 1613 static void ext4_orphan_cleanup(struct super_block *sb, 1614 struct ext4_super_block *es) 1615 { 1616 unsigned int s_flags = sb->s_flags; 1617 int nr_orphans = 0, nr_truncates = 0; 1618 #ifdef CONFIG_QUOTA 1619 int i; 1620 #endif 1621 if (!es->s_last_orphan) { 1622 jbd_debug(4, "no orphan inodes to clean up\n"); 1623 return; 1624 } 1625 1626 if (bdev_read_only(sb->s_bdev)) { 1627 printk(KERN_ERR "EXT4-fs: write access " 1628 "unavailable, skipping orphan cleanup.\n"); 1629 return; 1630 } 1631 1632 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1633 if (es->s_last_orphan) 1634 jbd_debug(1, "Errors on filesystem, " 1635 "clearing orphan list.\n"); 1636 es->s_last_orphan = 0; 1637 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1638 return; 1639 } 1640 1641 if (s_flags & MS_RDONLY) { 1642 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1643 sb->s_id); 1644 sb->s_flags &= ~MS_RDONLY; 1645 } 1646 #ifdef CONFIG_QUOTA 1647 /* Needed for iput() to work correctly and not trash data */ 1648 sb->s_flags |= MS_ACTIVE; 1649 /* Turn on quotas so that they are updated correctly */ 1650 for (i = 0; i < MAXQUOTAS; i++) { 1651 if (EXT4_SB(sb)->s_qf_names[i]) { 1652 int ret = ext4_quota_on_mount(sb, i); 1653 if (ret < 0) 1654 printk(KERN_ERR 1655 "EXT4-fs: Cannot turn on journaled " 1656 "quota: error %d\n", ret); 1657 } 1658 } 1659 #endif 1660 1661 while (es->s_last_orphan) { 1662 struct inode *inode; 1663 1664 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1665 if (IS_ERR(inode)) { 1666 es->s_last_orphan = 0; 1667 break; 1668 } 1669 1670 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1671 DQUOT_INIT(inode); 1672 if (inode->i_nlink) { 1673 printk(KERN_DEBUG 1674 "%s: truncating inode %lu to %lld bytes\n", 1675 __func__, inode->i_ino, inode->i_size); 1676 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 1677 inode->i_ino, inode->i_size); 1678 ext4_truncate(inode); 1679 nr_truncates++; 1680 } else { 1681 printk(KERN_DEBUG 1682 "%s: deleting unreferenced inode %lu\n", 1683 __func__, inode->i_ino); 1684 jbd_debug(2, "deleting unreferenced inode %lu\n", 1685 inode->i_ino); 1686 nr_orphans++; 1687 } 1688 iput(inode); /* The delete magic happens here! */ 1689 } 1690 1691 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 1692 1693 if (nr_orphans) 1694 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1695 sb->s_id, PLURAL(nr_orphans)); 1696 if (nr_truncates) 1697 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1698 sb->s_id, PLURAL(nr_truncates)); 1699 #ifdef CONFIG_QUOTA 1700 /* Turn quotas off */ 1701 for (i = 0; i < MAXQUOTAS; i++) { 1702 if (sb_dqopt(sb)->files[i]) 1703 vfs_quota_off(sb, i, 0); 1704 } 1705 #endif 1706 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1707 } 1708 /* 1709 * Maximal extent format file size. 1710 * Resulting logical blkno at s_maxbytes must fit in our on-disk 1711 * extent format containers, within a sector_t, and within i_blocks 1712 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 1713 * so that won't be a limiting factor. 1714 * 1715 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 1716 */ 1717 static loff_t ext4_max_size(int blkbits, int has_huge_files) 1718 { 1719 loff_t res; 1720 loff_t upper_limit = MAX_LFS_FILESIZE; 1721 1722 /* small i_blocks in vfs inode? */ 1723 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1724 /* 1725 * CONFIG_LSF is not enabled implies the inode 1726 * i_block represent total blocks in 512 bytes 1727 * 32 == size of vfs inode i_blocks * 8 1728 */ 1729 upper_limit = (1LL << 32) - 1; 1730 1731 /* total blocks in file system block size */ 1732 upper_limit >>= (blkbits - 9); 1733 upper_limit <<= blkbits; 1734 } 1735 1736 /* 32-bit extent-start container, ee_block */ 1737 res = 1LL << 32; 1738 res <<= blkbits; 1739 res -= 1; 1740 1741 /* Sanity check against vm- & vfs- imposed limits */ 1742 if (res > upper_limit) 1743 res = upper_limit; 1744 1745 return res; 1746 } 1747 1748 /* 1749 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 1750 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 1751 * We need to be 1 filesystem block less than the 2^48 sector limit. 1752 */ 1753 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 1754 { 1755 loff_t res = EXT4_NDIR_BLOCKS; 1756 int meta_blocks; 1757 loff_t upper_limit; 1758 /* This is calculated to be the largest file size for a 1759 * dense, bitmapped file such that the total number of 1760 * sectors in the file, including data and all indirect blocks, 1761 * does not exceed 2^48 -1 1762 * __u32 i_blocks_lo and _u16 i_blocks_high representing the 1763 * total number of 512 bytes blocks of the file 1764 */ 1765 1766 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1767 /* 1768 * !has_huge_files or CONFIG_LSF is not enabled 1769 * implies the inode i_block represent total blocks in 1770 * 512 bytes 32 == size of vfs inode i_blocks * 8 1771 */ 1772 upper_limit = (1LL << 32) - 1; 1773 1774 /* total blocks in file system block size */ 1775 upper_limit >>= (bits - 9); 1776 1777 } else { 1778 /* 1779 * We use 48 bit ext4_inode i_blocks 1780 * With EXT4_HUGE_FILE_FL set the i_blocks 1781 * represent total number of blocks in 1782 * file system block size 1783 */ 1784 upper_limit = (1LL << 48) - 1; 1785 1786 } 1787 1788 /* indirect blocks */ 1789 meta_blocks = 1; 1790 /* double indirect blocks */ 1791 meta_blocks += 1 + (1LL << (bits-2)); 1792 /* tripple indirect blocks */ 1793 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 1794 1795 upper_limit -= meta_blocks; 1796 upper_limit <<= bits; 1797 1798 res += 1LL << (bits-2); 1799 res += 1LL << (2*(bits-2)); 1800 res += 1LL << (3*(bits-2)); 1801 res <<= bits; 1802 if (res > upper_limit) 1803 res = upper_limit; 1804 1805 if (res > MAX_LFS_FILESIZE) 1806 res = MAX_LFS_FILESIZE; 1807 1808 return res; 1809 } 1810 1811 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 1812 ext4_fsblk_t logical_sb_block, int nr) 1813 { 1814 struct ext4_sb_info *sbi = EXT4_SB(sb); 1815 ext4_group_t bg, first_meta_bg; 1816 int has_super = 0; 1817 1818 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1819 1820 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 1821 nr < first_meta_bg) 1822 return logical_sb_block + nr + 1; 1823 bg = sbi->s_desc_per_block * nr; 1824 if (ext4_bg_has_super(sb, bg)) 1825 has_super = 1; 1826 return (has_super + ext4_group_first_block_no(sb, bg)); 1827 } 1828 1829 /** 1830 * ext4_get_stripe_size: Get the stripe size. 1831 * @sbi: In memory super block info 1832 * 1833 * If we have specified it via mount option, then 1834 * use the mount option value. If the value specified at mount time is 1835 * greater than the blocks per group use the super block value. 1836 * If the super block value is greater than blocks per group return 0. 1837 * Allocator needs it be less than blocks per group. 1838 * 1839 */ 1840 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 1841 { 1842 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 1843 unsigned long stripe_width = 1844 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 1845 1846 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 1847 return sbi->s_stripe; 1848 1849 if (stripe_width <= sbi->s_blocks_per_group) 1850 return stripe_width; 1851 1852 if (stride <= sbi->s_blocks_per_group) 1853 return stride; 1854 1855 return 0; 1856 } 1857 1858 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 1859 __releases(kernel_lock) 1860 __acquires(kernel_lock) 1861 1862 { 1863 struct buffer_head *bh; 1864 struct ext4_super_block *es = NULL; 1865 struct ext4_sb_info *sbi; 1866 ext4_fsblk_t block; 1867 ext4_fsblk_t sb_block = get_sb_block(&data); 1868 ext4_fsblk_t logical_sb_block; 1869 unsigned long offset = 0; 1870 unsigned int journal_inum = 0; 1871 unsigned long journal_devnum = 0; 1872 unsigned long def_mount_opts; 1873 struct inode *root; 1874 char *cp; 1875 int ret = -EINVAL; 1876 int blocksize; 1877 int db_count; 1878 int i; 1879 int needs_recovery, has_huge_files; 1880 __le32 features; 1881 __u64 blocks_count; 1882 int err; 1883 1884 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1885 if (!sbi) 1886 return -ENOMEM; 1887 sb->s_fs_info = sbi; 1888 sbi->s_mount_opt = 0; 1889 sbi->s_resuid = EXT4_DEF_RESUID; 1890 sbi->s_resgid = EXT4_DEF_RESGID; 1891 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 1892 sbi->s_sb_block = sb_block; 1893 1894 unlock_kernel(); 1895 1896 /* Cleanup superblock name */ 1897 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 1898 *cp = '!'; 1899 1900 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 1901 if (!blocksize) { 1902 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 1903 goto out_fail; 1904 } 1905 1906 /* 1907 * The ext4 superblock will not be buffer aligned for other than 1kB 1908 * block sizes. We need to calculate the offset from buffer start. 1909 */ 1910 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 1911 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 1912 offset = do_div(logical_sb_block, blocksize); 1913 } else { 1914 logical_sb_block = sb_block; 1915 } 1916 1917 if (!(bh = sb_bread(sb, logical_sb_block))) { 1918 printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); 1919 goto out_fail; 1920 } 1921 /* 1922 * Note: s_es must be initialized as soon as possible because 1923 * some ext4 macro-instructions depend on its value 1924 */ 1925 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 1926 sbi->s_es = es; 1927 sb->s_magic = le16_to_cpu(es->s_magic); 1928 if (sb->s_magic != EXT4_SUPER_MAGIC) 1929 goto cantfind_ext4; 1930 1931 /* Set defaults before we parse the mount options */ 1932 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1933 if (def_mount_opts & EXT4_DEFM_DEBUG) 1934 set_opt(sbi->s_mount_opt, DEBUG); 1935 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 1936 set_opt(sbi->s_mount_opt, GRPID); 1937 if (def_mount_opts & EXT4_DEFM_UID16) 1938 set_opt(sbi->s_mount_opt, NO_UID32); 1939 #ifdef CONFIG_EXT4_FS_XATTR 1940 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 1941 set_opt(sbi->s_mount_opt, XATTR_USER); 1942 #endif 1943 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1944 if (def_mount_opts & EXT4_DEFM_ACL) 1945 set_opt(sbi->s_mount_opt, POSIX_ACL); 1946 #endif 1947 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 1948 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 1949 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 1950 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 1951 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 1952 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 1953 1954 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 1955 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1956 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 1957 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1958 else 1959 set_opt(sbi->s_mount_opt, ERRORS_RO); 1960 1961 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1962 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1963 1964 set_opt(sbi->s_mount_opt, RESERVATION); 1965 set_opt(sbi->s_mount_opt, BARRIER); 1966 1967 /* 1968 * turn on extents feature by default in ext4 filesystem 1969 * only if feature flag already set by mkfs or tune2fs. 1970 * Use -o noextents to turn it off 1971 */ 1972 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) 1973 set_opt(sbi->s_mount_opt, EXTENTS); 1974 else 1975 ext4_warning(sb, __func__, 1976 "extents feature not enabled on this filesystem, " 1977 "use tune2fs.\n"); 1978 1979 /* 1980 * enable delayed allocation by default 1981 * Use -o nodelalloc to turn it off 1982 */ 1983 set_opt(sbi->s_mount_opt, DELALLOC); 1984 1985 1986 if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum, 1987 NULL, 0)) 1988 goto failed_mount; 1989 1990 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1991 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1992 1993 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 1994 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 1995 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1996 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1997 printk(KERN_WARNING 1998 "EXT4-fs warning: feature flags set on rev 0 fs, " 1999 "running e2fsck is recommended\n"); 2000 2001 /* 2002 * Check feature flags regardless of the revision level, since we 2003 * previously didn't change the revision level when setting the flags, 2004 * so there is a chance incompat flags are set on a rev 0 filesystem. 2005 */ 2006 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 2007 if (features) { 2008 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 2009 "unsupported optional features (%x).\n", 2010 sb->s_id, le32_to_cpu(features)); 2011 goto failed_mount; 2012 } 2013 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 2014 if (!(sb->s_flags & MS_RDONLY) && features) { 2015 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 2016 "unsupported optional features (%x).\n", 2017 sb->s_id, le32_to_cpu(features)); 2018 goto failed_mount; 2019 } 2020 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2021 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2022 if (has_huge_files) { 2023 /* 2024 * Large file size enabled file system can only be 2025 * mount if kernel is build with CONFIG_LSF 2026 */ 2027 if (sizeof(root->i_blocks) < sizeof(u64) && 2028 !(sb->s_flags & MS_RDONLY)) { 2029 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2030 "files cannot be mounted read-write " 2031 "without CONFIG_LSF.\n", sb->s_id); 2032 goto failed_mount; 2033 } 2034 } 2035 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2036 2037 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2038 blocksize > EXT4_MAX_BLOCK_SIZE) { 2039 printk(KERN_ERR 2040 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 2041 blocksize, sb->s_id); 2042 goto failed_mount; 2043 } 2044 2045 if (sb->s_blocksize != blocksize) { 2046 2047 /* Validate the filesystem blocksize */ 2048 if (!sb_set_blocksize(sb, blocksize)) { 2049 printk(KERN_ERR "EXT4-fs: bad block size %d.\n", 2050 blocksize); 2051 goto failed_mount; 2052 } 2053 2054 brelse(bh); 2055 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2056 offset = do_div(logical_sb_block, blocksize); 2057 bh = sb_bread(sb, logical_sb_block); 2058 if (!bh) { 2059 printk(KERN_ERR 2060 "EXT4-fs: Can't read superblock on 2nd try.\n"); 2061 goto failed_mount; 2062 } 2063 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2064 sbi->s_es = es; 2065 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2066 printk(KERN_ERR 2067 "EXT4-fs: Magic mismatch, very weird !\n"); 2068 goto failed_mount; 2069 } 2070 } 2071 2072 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2073 has_huge_files); 2074 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2075 2076 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2077 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2078 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2079 } else { 2080 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2081 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2082 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2083 (!is_power_of_2(sbi->s_inode_size)) || 2084 (sbi->s_inode_size > blocksize)) { 2085 printk(KERN_ERR 2086 "EXT4-fs: unsupported inode size: %d\n", 2087 sbi->s_inode_size); 2088 goto failed_mount; 2089 } 2090 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2091 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2092 } 2093 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2094 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2095 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2096 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2097 !is_power_of_2(sbi->s_desc_size)) { 2098 printk(KERN_ERR 2099 "EXT4-fs: unsupported descriptor size %lu\n", 2100 sbi->s_desc_size); 2101 goto failed_mount; 2102 } 2103 } else 2104 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2105 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2106 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2107 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2108 goto cantfind_ext4; 2109 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2110 if (sbi->s_inodes_per_block == 0) 2111 goto cantfind_ext4; 2112 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2113 sbi->s_inodes_per_block; 2114 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2115 sbi->s_sbh = bh; 2116 sbi->s_mount_state = le16_to_cpu(es->s_state); 2117 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2118 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2119 for (i = 0; i < 4; i++) 2120 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2121 sbi->s_def_hash_version = es->s_def_hash_version; 2122 2123 if (sbi->s_blocks_per_group > blocksize * 8) { 2124 printk(KERN_ERR 2125 "EXT4-fs: #blocks per group too big: %lu\n", 2126 sbi->s_blocks_per_group); 2127 goto failed_mount; 2128 } 2129 if (sbi->s_inodes_per_group > blocksize * 8) { 2130 printk(KERN_ERR 2131 "EXT4-fs: #inodes per group too big: %lu\n", 2132 sbi->s_inodes_per_group); 2133 goto failed_mount; 2134 } 2135 2136 if (ext4_blocks_count(es) > 2137 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 2138 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 2139 " too large to mount safely\n", sb->s_id); 2140 if (sizeof(sector_t) < 8) 2141 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 2142 "enabled\n"); 2143 goto failed_mount; 2144 } 2145 2146 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2147 goto cantfind_ext4; 2148 2149 /* ensure blocks_count calculation below doesn't sign-extend */ 2150 if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) < 2151 le32_to_cpu(es->s_first_data_block) + 1) { 2152 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, " 2153 "first data block %u, blocks per group %lu\n", 2154 ext4_blocks_count(es), 2155 le32_to_cpu(es->s_first_data_block), 2156 EXT4_BLOCKS_PER_GROUP(sb)); 2157 goto failed_mount; 2158 } 2159 blocks_count = (ext4_blocks_count(es) - 2160 le32_to_cpu(es->s_first_data_block) + 2161 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2162 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2163 sbi->s_groups_count = blocks_count; 2164 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2165 EXT4_DESC_PER_BLOCK(sb); 2166 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2167 GFP_KERNEL); 2168 if (sbi->s_group_desc == NULL) { 2169 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 2170 goto failed_mount; 2171 } 2172 2173 #ifdef CONFIG_PROC_FS 2174 if (ext4_proc_root) 2175 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2176 2177 if (sbi->s_proc) 2178 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, 2179 &ext4_ui_proc_fops, 2180 &sbi->s_inode_readahead_blks); 2181 #endif 2182 2183 bgl_lock_init(&sbi->s_blockgroup_lock); 2184 2185 for (i = 0; i < db_count; i++) { 2186 block = descriptor_loc(sb, logical_sb_block, i); 2187 sbi->s_group_desc[i] = sb_bread(sb, block); 2188 if (!sbi->s_group_desc[i]) { 2189 printk(KERN_ERR "EXT4-fs: " 2190 "can't read group descriptor %d\n", i); 2191 db_count = i; 2192 goto failed_mount2; 2193 } 2194 } 2195 if (!ext4_check_descriptors(sb)) { 2196 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2197 goto failed_mount2; 2198 } 2199 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2200 if (!ext4_fill_flex_info(sb)) { 2201 printk(KERN_ERR 2202 "EXT4-fs: unable to initialize " 2203 "flex_bg meta info!\n"); 2204 goto failed_mount2; 2205 } 2206 2207 sbi->s_gdb_count = db_count; 2208 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2209 spin_lock_init(&sbi->s_next_gen_lock); 2210 2211 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2212 ext4_count_free_blocks(sb)); 2213 if (!err) { 2214 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2215 ext4_count_free_inodes(sb)); 2216 } 2217 if (!err) { 2218 err = percpu_counter_init(&sbi->s_dirs_counter, 2219 ext4_count_dirs(sb)); 2220 } 2221 if (!err) { 2222 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2223 } 2224 if (err) { 2225 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2226 goto failed_mount3; 2227 } 2228 2229 sbi->s_stripe = ext4_get_stripe_size(sbi); 2230 2231 /* 2232 * set up enough so that it can read an inode 2233 */ 2234 sb->s_op = &ext4_sops; 2235 sb->s_export_op = &ext4_export_ops; 2236 sb->s_xattr = ext4_xattr_handlers; 2237 #ifdef CONFIG_QUOTA 2238 sb->s_qcop = &ext4_qctl_operations; 2239 sb->dq_op = &ext4_quota_operations; 2240 #endif 2241 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2242 2243 sb->s_root = NULL; 2244 2245 needs_recovery = (es->s_last_orphan != 0 || 2246 EXT4_HAS_INCOMPAT_FEATURE(sb, 2247 EXT4_FEATURE_INCOMPAT_RECOVER)); 2248 2249 /* 2250 * The first inode we look at is the journal inode. Don't try 2251 * root first: it may be modified in the journal! 2252 */ 2253 if (!test_opt(sb, NOLOAD) && 2254 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2255 if (ext4_load_journal(sb, es, journal_devnum)) 2256 goto failed_mount3; 2257 if (!(sb->s_flags & MS_RDONLY) && 2258 EXT4_SB(sb)->s_journal->j_failed_commit) { 2259 printk(KERN_CRIT "EXT4-fs error (device %s): " 2260 "ext4_fill_super: Journal transaction " 2261 "%u is corrupt\n", sb->s_id, 2262 EXT4_SB(sb)->s_journal->j_failed_commit); 2263 if (test_opt(sb, ERRORS_RO)) { 2264 printk(KERN_CRIT 2265 "Mounting filesystem read-only\n"); 2266 sb->s_flags |= MS_RDONLY; 2267 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2268 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2269 } 2270 if (test_opt(sb, ERRORS_PANIC)) { 2271 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2272 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2273 ext4_commit_super(sb, es, 1); 2274 printk(KERN_CRIT 2275 "EXT4-fs (device %s): mount failed\n", 2276 sb->s_id); 2277 goto failed_mount4; 2278 } 2279 } 2280 } else if (journal_inum) { 2281 if (ext4_create_journal(sb, es, journal_inum)) 2282 goto failed_mount3; 2283 } else { 2284 if (!silent) 2285 printk(KERN_ERR 2286 "ext4: No journal on filesystem on %s\n", 2287 sb->s_id); 2288 goto failed_mount3; 2289 } 2290 2291 if (ext4_blocks_count(es) > 0xffffffffULL && 2292 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2293 JBD2_FEATURE_INCOMPAT_64BIT)) { 2294 printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n"); 2295 goto failed_mount4; 2296 } 2297 2298 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2299 jbd2_journal_set_features(sbi->s_journal, 2300 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2301 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2302 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2303 jbd2_journal_set_features(sbi->s_journal, 2304 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2305 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2306 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2307 } else { 2308 jbd2_journal_clear_features(sbi->s_journal, 2309 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2310 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2311 } 2312 2313 /* We have now updated the journal if required, so we can 2314 * validate the data journaling mode. */ 2315 switch (test_opt(sb, DATA_FLAGS)) { 2316 case 0: 2317 /* No mode set, assume a default based on the journal 2318 * capabilities: ORDERED_DATA if the journal can 2319 * cope, else JOURNAL_DATA 2320 */ 2321 if (jbd2_journal_check_available_features 2322 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2323 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2324 else 2325 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2326 break; 2327 2328 case EXT4_MOUNT_ORDERED_DATA: 2329 case EXT4_MOUNT_WRITEBACK_DATA: 2330 if (!jbd2_journal_check_available_features 2331 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2332 printk(KERN_ERR "EXT4-fs: Journal does not support " 2333 "requested data journaling mode\n"); 2334 goto failed_mount4; 2335 } 2336 default: 2337 break; 2338 } 2339 2340 if (test_opt(sb, NOBH)) { 2341 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2342 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 2343 "its supported only with writeback mode\n"); 2344 clear_opt(sbi->s_mount_opt, NOBH); 2345 } 2346 } 2347 /* 2348 * The jbd2_journal_load will have done any necessary log recovery, 2349 * so we can safely mount the rest of the filesystem now. 2350 */ 2351 2352 root = ext4_iget(sb, EXT4_ROOT_INO); 2353 if (IS_ERR(root)) { 2354 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 2355 ret = PTR_ERR(root); 2356 goto failed_mount4; 2357 } 2358 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2359 iput(root); 2360 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 2361 goto failed_mount4; 2362 } 2363 sb->s_root = d_alloc_root(root); 2364 if (!sb->s_root) { 2365 printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); 2366 iput(root); 2367 ret = -ENOMEM; 2368 goto failed_mount4; 2369 } 2370 2371 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2372 2373 /* determine the minimum size of new large inodes, if present */ 2374 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2375 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2376 EXT4_GOOD_OLD_INODE_SIZE; 2377 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2378 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2379 if (sbi->s_want_extra_isize < 2380 le16_to_cpu(es->s_want_extra_isize)) 2381 sbi->s_want_extra_isize = 2382 le16_to_cpu(es->s_want_extra_isize); 2383 if (sbi->s_want_extra_isize < 2384 le16_to_cpu(es->s_min_extra_isize)) 2385 sbi->s_want_extra_isize = 2386 le16_to_cpu(es->s_min_extra_isize); 2387 } 2388 } 2389 /* Check if enough inode space is available */ 2390 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2391 sbi->s_inode_size) { 2392 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2393 EXT4_GOOD_OLD_INODE_SIZE; 2394 printk(KERN_INFO "EXT4-fs: required extra inode space not" 2395 "available.\n"); 2396 } 2397 2398 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2399 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2400 "requested data journaling mode\n"); 2401 clear_opt(sbi->s_mount_opt, DELALLOC); 2402 } else if (test_opt(sb, DELALLOC)) 2403 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2404 2405 ext4_ext_init(sb); 2406 err = ext4_mb_init(sb, needs_recovery); 2407 if (err) { 2408 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", 2409 err); 2410 goto failed_mount4; 2411 } 2412 2413 /* 2414 * akpm: core read_super() calls in here with the superblock locked. 2415 * That deadlocks, because orphan cleanup needs to lock the superblock 2416 * in numerous places. Here we just pop the lock - it's relatively 2417 * harmless, because we are now ready to accept write_super() requests, 2418 * and aviro says that's the only reason for hanging onto the 2419 * superblock lock. 2420 */ 2421 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 2422 ext4_orphan_cleanup(sb, es); 2423 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2424 if (needs_recovery) 2425 printk(KERN_INFO "EXT4-fs: recovery complete.\n"); 2426 ext4_mark_recovery_complete(sb, es); 2427 printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 2428 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 2429 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2430 "writeback"); 2431 2432 lock_kernel(); 2433 return 0; 2434 2435 cantfind_ext4: 2436 if (!silent) 2437 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 2438 sb->s_id); 2439 goto failed_mount; 2440 2441 failed_mount4: 2442 jbd2_journal_destroy(sbi->s_journal); 2443 sbi->s_journal = NULL; 2444 failed_mount3: 2445 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2446 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2447 percpu_counter_destroy(&sbi->s_dirs_counter); 2448 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 2449 failed_mount2: 2450 for (i = 0; i < db_count; i++) 2451 brelse(sbi->s_group_desc[i]); 2452 kfree(sbi->s_group_desc); 2453 failed_mount: 2454 if (sbi->s_proc) { 2455 remove_proc_entry("inode_readahead_blks", sbi->s_proc); 2456 remove_proc_entry(sb->s_id, ext4_proc_root); 2457 } 2458 #ifdef CONFIG_QUOTA 2459 for (i = 0; i < MAXQUOTAS; i++) 2460 kfree(sbi->s_qf_names[i]); 2461 #endif 2462 ext4_blkdev_remove(sbi); 2463 brelse(bh); 2464 out_fail: 2465 sb->s_fs_info = NULL; 2466 kfree(sbi); 2467 lock_kernel(); 2468 return ret; 2469 } 2470 2471 /* 2472 * Setup any per-fs journal parameters now. We'll do this both on 2473 * initial mount, once the journal has been initialised but before we've 2474 * done any recovery; and again on any subsequent remount. 2475 */ 2476 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 2477 { 2478 struct ext4_sb_info *sbi = EXT4_SB(sb); 2479 2480 if (sbi->s_commit_interval) 2481 journal->j_commit_interval = sbi->s_commit_interval; 2482 /* We could also set up an ext4-specific default for the commit 2483 * interval here, but for now we'll just fall back to the jbd 2484 * default. */ 2485 2486 spin_lock(&journal->j_state_lock); 2487 if (test_opt(sb, BARRIER)) 2488 journal->j_flags |= JBD2_BARRIER; 2489 else 2490 journal->j_flags &= ~JBD2_BARRIER; 2491 if (test_opt(sb, DATA_ERR_ABORT)) 2492 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 2493 else 2494 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 2495 spin_unlock(&journal->j_state_lock); 2496 } 2497 2498 static journal_t *ext4_get_journal(struct super_block *sb, 2499 unsigned int journal_inum) 2500 { 2501 struct inode *journal_inode; 2502 journal_t *journal; 2503 2504 /* First, test for the existence of a valid inode on disk. Bad 2505 * things happen if we iget() an unused inode, as the subsequent 2506 * iput() will try to delete it. */ 2507 2508 journal_inode = ext4_iget(sb, journal_inum); 2509 if (IS_ERR(journal_inode)) { 2510 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 2511 return NULL; 2512 } 2513 if (!journal_inode->i_nlink) { 2514 make_bad_inode(journal_inode); 2515 iput(journal_inode); 2516 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 2517 return NULL; 2518 } 2519 2520 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 2521 journal_inode, journal_inode->i_size); 2522 if (!S_ISREG(journal_inode->i_mode)) { 2523 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2524 iput(journal_inode); 2525 return NULL; 2526 } 2527 2528 journal = jbd2_journal_init_inode(journal_inode); 2529 if (!journal) { 2530 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 2531 iput(journal_inode); 2532 return NULL; 2533 } 2534 journal->j_private = sb; 2535 ext4_init_journal_params(sb, journal); 2536 return journal; 2537 } 2538 2539 static journal_t *ext4_get_dev_journal(struct super_block *sb, 2540 dev_t j_dev) 2541 { 2542 struct buffer_head *bh; 2543 journal_t *journal; 2544 ext4_fsblk_t start; 2545 ext4_fsblk_t len; 2546 int hblock, blocksize; 2547 ext4_fsblk_t sb_block; 2548 unsigned long offset; 2549 struct ext4_super_block *es; 2550 struct block_device *bdev; 2551 2552 bdev = ext4_blkdev_get(j_dev); 2553 if (bdev == NULL) 2554 return NULL; 2555 2556 if (bd_claim(bdev, sb)) { 2557 printk(KERN_ERR 2558 "EXT4: failed to claim external journal device.\n"); 2559 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 2560 return NULL; 2561 } 2562 2563 blocksize = sb->s_blocksize; 2564 hblock = bdev_hardsect_size(bdev); 2565 if (blocksize < hblock) { 2566 printk(KERN_ERR 2567 "EXT4-fs: blocksize too small for journal device.\n"); 2568 goto out_bdev; 2569 } 2570 2571 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 2572 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 2573 set_blocksize(bdev, blocksize); 2574 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2575 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 2576 "external journal\n"); 2577 goto out_bdev; 2578 } 2579 2580 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2581 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 2582 !(le32_to_cpu(es->s_feature_incompat) & 2583 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2584 printk(KERN_ERR "EXT4-fs: external journal has " 2585 "bad superblock\n"); 2586 brelse(bh); 2587 goto out_bdev; 2588 } 2589 2590 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2591 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 2592 brelse(bh); 2593 goto out_bdev; 2594 } 2595 2596 len = ext4_blocks_count(es); 2597 start = sb_block + 1; 2598 brelse(bh); /* we're done with the superblock */ 2599 2600 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 2601 start, len, blocksize); 2602 if (!journal) { 2603 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 2604 goto out_bdev; 2605 } 2606 journal->j_private = sb; 2607 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2608 wait_on_buffer(journal->j_sb_buffer); 2609 if (!buffer_uptodate(journal->j_sb_buffer)) { 2610 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 2611 goto out_journal; 2612 } 2613 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2614 printk(KERN_ERR "EXT4-fs: External journal has more than one " 2615 "user (unsupported) - %d\n", 2616 be32_to_cpu(journal->j_superblock->s_nr_users)); 2617 goto out_journal; 2618 } 2619 EXT4_SB(sb)->journal_bdev = bdev; 2620 ext4_init_journal_params(sb, journal); 2621 return journal; 2622 out_journal: 2623 jbd2_journal_destroy(journal); 2624 out_bdev: 2625 ext4_blkdev_put(bdev); 2626 return NULL; 2627 } 2628 2629 static int ext4_load_journal(struct super_block *sb, 2630 struct ext4_super_block *es, 2631 unsigned long journal_devnum) 2632 { 2633 journal_t *journal; 2634 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2635 dev_t journal_dev; 2636 int err = 0; 2637 int really_read_only; 2638 2639 if (journal_devnum && 2640 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2641 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 2642 "numbers have changed\n"); 2643 journal_dev = new_decode_dev(journal_devnum); 2644 } else 2645 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2646 2647 really_read_only = bdev_read_only(sb->s_bdev); 2648 2649 /* 2650 * Are we loading a blank journal or performing recovery after a 2651 * crash? For recovery, we need to check in advance whether we 2652 * can get read-write access to the device. 2653 */ 2654 2655 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2656 if (sb->s_flags & MS_RDONLY) { 2657 printk(KERN_INFO "EXT4-fs: INFO: recovery " 2658 "required on readonly filesystem.\n"); 2659 if (really_read_only) { 2660 printk(KERN_ERR "EXT4-fs: write access " 2661 "unavailable, cannot proceed.\n"); 2662 return -EROFS; 2663 } 2664 printk(KERN_INFO "EXT4-fs: write access will " 2665 "be enabled during recovery.\n"); 2666 } 2667 } 2668 2669 if (journal_inum && journal_dev) { 2670 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 2671 "and inode journals!\n"); 2672 return -EINVAL; 2673 } 2674 2675 if (journal_inum) { 2676 if (!(journal = ext4_get_journal(sb, journal_inum))) 2677 return -EINVAL; 2678 } else { 2679 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 2680 return -EINVAL; 2681 } 2682 2683 if (journal->j_flags & JBD2_BARRIER) 2684 printk(KERN_INFO "EXT4-fs: barriers enabled\n"); 2685 else 2686 printk(KERN_INFO "EXT4-fs: barriers disabled\n"); 2687 2688 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2689 err = jbd2_journal_update_format(journal); 2690 if (err) { 2691 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 2692 jbd2_journal_destroy(journal); 2693 return err; 2694 } 2695 } 2696 2697 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 2698 err = jbd2_journal_wipe(journal, !really_read_only); 2699 if (!err) 2700 err = jbd2_journal_load(journal); 2701 2702 if (err) { 2703 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 2704 jbd2_journal_destroy(journal); 2705 return err; 2706 } 2707 2708 EXT4_SB(sb)->s_journal = journal; 2709 ext4_clear_journal_err(sb, es); 2710 2711 if (journal_devnum && 2712 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2713 es->s_journal_dev = cpu_to_le32(journal_devnum); 2714 sb->s_dirt = 1; 2715 2716 /* Make sure we flush the recovery flag to disk. */ 2717 ext4_commit_super(sb, es, 1); 2718 } 2719 2720 return 0; 2721 } 2722 2723 static int ext4_create_journal(struct super_block *sb, 2724 struct ext4_super_block *es, 2725 unsigned int journal_inum) 2726 { 2727 journal_t *journal; 2728 int err; 2729 2730 if (sb->s_flags & MS_RDONLY) { 2731 printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " 2732 "create journal.\n"); 2733 return -EROFS; 2734 } 2735 2736 journal = ext4_get_journal(sb, journal_inum); 2737 if (!journal) 2738 return -EINVAL; 2739 2740 printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", 2741 journal_inum); 2742 2743 err = jbd2_journal_create(journal); 2744 if (err) { 2745 printk(KERN_ERR "EXT4-fs: error creating journal.\n"); 2746 jbd2_journal_destroy(journal); 2747 return -EIO; 2748 } 2749 2750 EXT4_SB(sb)->s_journal = journal; 2751 2752 ext4_update_dynamic_rev(sb); 2753 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2754 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); 2755 2756 es->s_journal_inum = cpu_to_le32(journal_inum); 2757 sb->s_dirt = 1; 2758 2759 /* Make sure we flush the recovery flag to disk. */ 2760 ext4_commit_super(sb, es, 1); 2761 2762 return 0; 2763 } 2764 2765 static void ext4_commit_super(struct super_block *sb, 2766 struct ext4_super_block *es, int sync) 2767 { 2768 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2769 2770 if (!sbh) 2771 return; 2772 if (buffer_write_io_error(sbh)) { 2773 /* 2774 * Oh, dear. A previous attempt to write the 2775 * superblock failed. This could happen because the 2776 * USB device was yanked out. Or it could happen to 2777 * be a transient write error and maybe the block will 2778 * be remapped. Nothing we can do but to retry the 2779 * write and hope for the best. 2780 */ 2781 printk(KERN_ERR "ext4: previous I/O error to " 2782 "superblock detected for %s.\n", sb->s_id); 2783 clear_buffer_write_io_error(sbh); 2784 set_buffer_uptodate(sbh); 2785 } 2786 es->s_wtime = cpu_to_le32(get_seconds()); 2787 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2788 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2789 BUFFER_TRACE(sbh, "marking dirty"); 2790 mark_buffer_dirty(sbh); 2791 if (sync) { 2792 sync_dirty_buffer(sbh); 2793 if (buffer_write_io_error(sbh)) { 2794 printk(KERN_ERR "ext4: I/O error while writing " 2795 "superblock for %s.\n", sb->s_id); 2796 clear_buffer_write_io_error(sbh); 2797 set_buffer_uptodate(sbh); 2798 } 2799 } 2800 } 2801 2802 2803 /* 2804 * Have we just finished recovery? If so, and if we are mounting (or 2805 * remounting) the filesystem readonly, then we will end up with a 2806 * consistent fs on disk. Record that fact. 2807 */ 2808 static void ext4_mark_recovery_complete(struct super_block *sb, 2809 struct ext4_super_block *es) 2810 { 2811 journal_t *journal = EXT4_SB(sb)->s_journal; 2812 2813 jbd2_journal_lock_updates(journal); 2814 if (jbd2_journal_flush(journal) < 0) 2815 goto out; 2816 2817 lock_super(sb); 2818 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2819 sb->s_flags & MS_RDONLY) { 2820 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2821 sb->s_dirt = 0; 2822 ext4_commit_super(sb, es, 1); 2823 } 2824 unlock_super(sb); 2825 2826 out: 2827 jbd2_journal_unlock_updates(journal); 2828 } 2829 2830 /* 2831 * If we are mounting (or read-write remounting) a filesystem whose journal 2832 * has recorded an error from a previous lifetime, move that error to the 2833 * main filesystem now. 2834 */ 2835 static void ext4_clear_journal_err(struct super_block *sb, 2836 struct ext4_super_block *es) 2837 { 2838 journal_t *journal; 2839 int j_errno; 2840 const char *errstr; 2841 2842 journal = EXT4_SB(sb)->s_journal; 2843 2844 /* 2845 * Now check for any error status which may have been recorded in the 2846 * journal by a prior ext4_error() or ext4_abort() 2847 */ 2848 2849 j_errno = jbd2_journal_errno(journal); 2850 if (j_errno) { 2851 char nbuf[16]; 2852 2853 errstr = ext4_decode_error(sb, j_errno, nbuf); 2854 ext4_warning(sb, __func__, "Filesystem error recorded " 2855 "from previous mount: %s", errstr); 2856 ext4_warning(sb, __func__, "Marking fs in need of " 2857 "filesystem check."); 2858 2859 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2860 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2861 ext4_commit_super(sb, es, 1); 2862 2863 jbd2_journal_clear_err(journal); 2864 } 2865 } 2866 2867 /* 2868 * Force the running and committing transactions to commit, 2869 * and wait on the commit. 2870 */ 2871 int ext4_force_commit(struct super_block *sb) 2872 { 2873 journal_t *journal; 2874 int ret; 2875 2876 if (sb->s_flags & MS_RDONLY) 2877 return 0; 2878 2879 journal = EXT4_SB(sb)->s_journal; 2880 sb->s_dirt = 0; 2881 ret = ext4_journal_force_commit(journal); 2882 return ret; 2883 } 2884 2885 /* 2886 * Ext4 always journals updates to the superblock itself, so we don't 2887 * have to propagate any other updates to the superblock on disk at this 2888 * point. Just start an async writeback to get the buffers on their way 2889 * to the disk. 2890 * 2891 * This implicitly triggers the writebehind on sync(). 2892 */ 2893 2894 static void ext4_write_super(struct super_block *sb) 2895 { 2896 if (mutex_trylock(&sb->s_lock) != 0) 2897 BUG(); 2898 sb->s_dirt = 0; 2899 } 2900 2901 static int ext4_sync_fs(struct super_block *sb, int wait) 2902 { 2903 tid_t target; 2904 2905 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 2906 sb->s_dirt = 0; 2907 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2908 if (wait) 2909 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 2910 } 2911 return 0; 2912 } 2913 2914 /* 2915 * LVM calls this function before a (read-only) snapshot is created. This 2916 * gives us a chance to flush the journal completely and mark the fs clean. 2917 */ 2918 static void ext4_write_super_lockfs(struct super_block *sb) 2919 { 2920 sb->s_dirt = 0; 2921 2922 if (!(sb->s_flags & MS_RDONLY)) { 2923 journal_t *journal = EXT4_SB(sb)->s_journal; 2924 2925 /* Now we set up the journal barrier. */ 2926 jbd2_journal_lock_updates(journal); 2927 2928 /* 2929 * We don't want to clear needs_recovery flag when we failed 2930 * to flush the journal. 2931 */ 2932 if (jbd2_journal_flush(journal) < 0) 2933 return; 2934 2935 /* Journal blocked and flushed, clear needs_recovery flag. */ 2936 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2937 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2938 } 2939 } 2940 2941 /* 2942 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2943 * flag here, even though the filesystem is not technically dirty yet. 2944 */ 2945 static void ext4_unlockfs(struct super_block *sb) 2946 { 2947 if (!(sb->s_flags & MS_RDONLY)) { 2948 lock_super(sb); 2949 /* Reser the needs_recovery flag before the fs is unlocked. */ 2950 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2951 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2952 unlock_super(sb); 2953 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 2954 } 2955 } 2956 2957 static int ext4_remount(struct super_block *sb, int *flags, char *data) 2958 { 2959 struct ext4_super_block *es; 2960 struct ext4_sb_info *sbi = EXT4_SB(sb); 2961 ext4_fsblk_t n_blocks_count = 0; 2962 unsigned long old_sb_flags; 2963 struct ext4_mount_options old_opts; 2964 ext4_group_t g; 2965 int err; 2966 #ifdef CONFIG_QUOTA 2967 int i; 2968 #endif 2969 2970 /* Store the original options */ 2971 old_sb_flags = sb->s_flags; 2972 old_opts.s_mount_opt = sbi->s_mount_opt; 2973 old_opts.s_resuid = sbi->s_resuid; 2974 old_opts.s_resgid = sbi->s_resgid; 2975 old_opts.s_commit_interval = sbi->s_commit_interval; 2976 #ifdef CONFIG_QUOTA 2977 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2978 for (i = 0; i < MAXQUOTAS; i++) 2979 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2980 #endif 2981 2982 /* 2983 * Allow the "check" option to be passed as a remount option. 2984 */ 2985 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2986 err = -EINVAL; 2987 goto restore_opts; 2988 } 2989 2990 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2991 ext4_abort(sb, __func__, "Abort forced by user"); 2992 2993 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2994 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2995 2996 es = sbi->s_es; 2997 2998 ext4_init_journal_params(sb, sbi->s_journal); 2999 3000 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3001 n_blocks_count > ext4_blocks_count(es)) { 3002 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 3003 err = -EROFS; 3004 goto restore_opts; 3005 } 3006 3007 if (*flags & MS_RDONLY) { 3008 /* 3009 * First of all, the unconditional stuff we have to do 3010 * to disable replay of the journal when we next remount 3011 */ 3012 sb->s_flags |= MS_RDONLY; 3013 3014 /* 3015 * OK, test if we are remounting a valid rw partition 3016 * readonly, and if so set the rdonly flag and then 3017 * mark the partition as valid again. 3018 */ 3019 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3020 (sbi->s_mount_state & EXT4_VALID_FS)) 3021 es->s_state = cpu_to_le16(sbi->s_mount_state); 3022 3023 /* 3024 * We have to unlock super so that we can wait for 3025 * transactions. 3026 */ 3027 unlock_super(sb); 3028 ext4_mark_recovery_complete(sb, es); 3029 lock_super(sb); 3030 } else { 3031 __le32 ret; 3032 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3033 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 3034 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3035 "remount RDWR because of unsupported " 3036 "optional features (%x).\n", 3037 sb->s_id, le32_to_cpu(ret)); 3038 err = -EROFS; 3039 goto restore_opts; 3040 } 3041 3042 /* 3043 * Make sure the group descriptor checksums 3044 * are sane. If they aren't, refuse to 3045 * remount r/w. 3046 */ 3047 for (g = 0; g < sbi->s_groups_count; g++) { 3048 struct ext4_group_desc *gdp = 3049 ext4_get_group_desc(sb, g, NULL); 3050 3051 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3052 printk(KERN_ERR 3053 "EXT4-fs: ext4_remount: " 3054 "Checksum for group %lu failed (%u!=%u)\n", 3055 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3056 le16_to_cpu(gdp->bg_checksum)); 3057 err = -EINVAL; 3058 goto restore_opts; 3059 } 3060 } 3061 3062 /* 3063 * If we have an unprocessed orphan list hanging 3064 * around from a previously readonly bdev mount, 3065 * require a full umount/remount for now. 3066 */ 3067 if (es->s_last_orphan) { 3068 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 3069 "remount RDWR because of unprocessed " 3070 "orphan inode list. Please " 3071 "umount/remount instead.\n", 3072 sb->s_id); 3073 err = -EINVAL; 3074 goto restore_opts; 3075 } 3076 3077 /* 3078 * Mounting a RDONLY partition read-write, so reread 3079 * and store the current valid flag. (It may have 3080 * been changed by e2fsck since we originally mounted 3081 * the partition.) 3082 */ 3083 ext4_clear_journal_err(sb, es); 3084 sbi->s_mount_state = le16_to_cpu(es->s_state); 3085 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3086 goto restore_opts; 3087 if (!ext4_setup_super(sb, es, 0)) 3088 sb->s_flags &= ~MS_RDONLY; 3089 } 3090 } 3091 #ifdef CONFIG_QUOTA 3092 /* Release old quota file names */ 3093 for (i = 0; i < MAXQUOTAS; i++) 3094 if (old_opts.s_qf_names[i] && 3095 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3096 kfree(old_opts.s_qf_names[i]); 3097 #endif 3098 return 0; 3099 restore_opts: 3100 sb->s_flags = old_sb_flags; 3101 sbi->s_mount_opt = old_opts.s_mount_opt; 3102 sbi->s_resuid = old_opts.s_resuid; 3103 sbi->s_resgid = old_opts.s_resgid; 3104 sbi->s_commit_interval = old_opts.s_commit_interval; 3105 #ifdef CONFIG_QUOTA 3106 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3107 for (i = 0; i < MAXQUOTAS; i++) { 3108 if (sbi->s_qf_names[i] && 3109 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3110 kfree(sbi->s_qf_names[i]); 3111 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3112 } 3113 #endif 3114 return err; 3115 } 3116 3117 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3118 { 3119 struct super_block *sb = dentry->d_sb; 3120 struct ext4_sb_info *sbi = EXT4_SB(sb); 3121 struct ext4_super_block *es = sbi->s_es; 3122 u64 fsid; 3123 3124 if (test_opt(sb, MINIX_DF)) { 3125 sbi->s_overhead_last = 0; 3126 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3127 ext4_group_t ngroups = sbi->s_groups_count, i; 3128 ext4_fsblk_t overhead = 0; 3129 smp_rmb(); 3130 3131 /* 3132 * Compute the overhead (FS structures). This is constant 3133 * for a given filesystem unless the number of block groups 3134 * changes so we cache the previous value until it does. 3135 */ 3136 3137 /* 3138 * All of the blocks before first_data_block are 3139 * overhead 3140 */ 3141 overhead = le32_to_cpu(es->s_first_data_block); 3142 3143 /* 3144 * Add the overhead attributed to the superblock and 3145 * block group descriptors. If the sparse superblocks 3146 * feature is turned on, then not all groups have this. 3147 */ 3148 for (i = 0; i < ngroups; i++) { 3149 overhead += ext4_bg_has_super(sb, i) + 3150 ext4_bg_num_gdb(sb, i); 3151 cond_resched(); 3152 } 3153 3154 /* 3155 * Every block group has an inode bitmap, a block 3156 * bitmap, and an inode table. 3157 */ 3158 overhead += ngroups * (2 + sbi->s_itb_per_group); 3159 sbi->s_overhead_last = overhead; 3160 smp_wmb(); 3161 sbi->s_blocks_last = ext4_blocks_count(es); 3162 } 3163 3164 buf->f_type = EXT4_SUPER_MAGIC; 3165 buf->f_bsize = sb->s_blocksize; 3166 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3167 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3168 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3169 ext4_free_blocks_count_set(es, buf->f_bfree); 3170 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3171 if (buf->f_bfree < ext4_r_blocks_count(es)) 3172 buf->f_bavail = 0; 3173 buf->f_files = le32_to_cpu(es->s_inodes_count); 3174 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3175 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); 3176 buf->f_namelen = EXT4_NAME_LEN; 3177 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3178 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3179 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3180 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3181 return 0; 3182 } 3183 3184 /* Helper function for writing quotas on sync - we need to start transaction before quota file 3185 * is locked for write. Otherwise the are possible deadlocks: 3186 * Process 1 Process 2 3187 * ext4_create() quota_sync() 3188 * jbd2_journal_start() write_dquot() 3189 * DQUOT_INIT() down(dqio_mutex) 3190 * down(dqio_mutex) jbd2_journal_start() 3191 * 3192 */ 3193 3194 #ifdef CONFIG_QUOTA 3195 3196 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3197 { 3198 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3199 } 3200 3201 static int ext4_dquot_initialize(struct inode *inode, int type) 3202 { 3203 handle_t *handle; 3204 int ret, err; 3205 3206 /* We may create quota structure so we need to reserve enough blocks */ 3207 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)); 3208 if (IS_ERR(handle)) 3209 return PTR_ERR(handle); 3210 ret = dquot_initialize(inode, type); 3211 err = ext4_journal_stop(handle); 3212 if (!ret) 3213 ret = err; 3214 return ret; 3215 } 3216 3217 static int ext4_dquot_drop(struct inode *inode) 3218 { 3219 handle_t *handle; 3220 int ret, err; 3221 3222 /* We may delete quota structure so we need to reserve enough blocks */ 3223 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 3224 if (IS_ERR(handle)) { 3225 /* 3226 * We call dquot_drop() anyway to at least release references 3227 * to quota structures so that umount does not hang. 3228 */ 3229 dquot_drop(inode); 3230 return PTR_ERR(handle); 3231 } 3232 ret = dquot_drop(inode); 3233 err = ext4_journal_stop(handle); 3234 if (!ret) 3235 ret = err; 3236 return ret; 3237 } 3238 3239 static int ext4_write_dquot(struct dquot *dquot) 3240 { 3241 int ret, err; 3242 handle_t *handle; 3243 struct inode *inode; 3244 3245 inode = dquot_to_inode(dquot); 3246 handle = ext4_journal_start(inode, 3247 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3248 if (IS_ERR(handle)) 3249 return PTR_ERR(handle); 3250 ret = dquot_commit(dquot); 3251 err = ext4_journal_stop(handle); 3252 if (!ret) 3253 ret = err; 3254 return ret; 3255 } 3256 3257 static int ext4_acquire_dquot(struct dquot *dquot) 3258 { 3259 int ret, err; 3260 handle_t *handle; 3261 3262 handle = ext4_journal_start(dquot_to_inode(dquot), 3263 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3264 if (IS_ERR(handle)) 3265 return PTR_ERR(handle); 3266 ret = dquot_acquire(dquot); 3267 err = ext4_journal_stop(handle); 3268 if (!ret) 3269 ret = err; 3270 return ret; 3271 } 3272 3273 static int ext4_release_dquot(struct dquot *dquot) 3274 { 3275 int ret, err; 3276 handle_t *handle; 3277 3278 handle = ext4_journal_start(dquot_to_inode(dquot), 3279 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3280 if (IS_ERR(handle)) { 3281 /* Release dquot anyway to avoid endless cycle in dqput() */ 3282 dquot_release(dquot); 3283 return PTR_ERR(handle); 3284 } 3285 ret = dquot_release(dquot); 3286 err = ext4_journal_stop(handle); 3287 if (!ret) 3288 ret = err; 3289 return ret; 3290 } 3291 3292 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3293 { 3294 /* Are we journaling quotas? */ 3295 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3296 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3297 dquot_mark_dquot_dirty(dquot); 3298 return ext4_write_dquot(dquot); 3299 } else { 3300 return dquot_mark_dquot_dirty(dquot); 3301 } 3302 } 3303 3304 static int ext4_write_info(struct super_block *sb, int type) 3305 { 3306 int ret, err; 3307 handle_t *handle; 3308 3309 /* Data block + inode block */ 3310 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3311 if (IS_ERR(handle)) 3312 return PTR_ERR(handle); 3313 ret = dquot_commit_info(sb, type); 3314 err = ext4_journal_stop(handle); 3315 if (!ret) 3316 ret = err; 3317 return ret; 3318 } 3319 3320 /* 3321 * Turn on quotas during mount time - we need to find 3322 * the quota file and such... 3323 */ 3324 static int ext4_quota_on_mount(struct super_block *sb, int type) 3325 { 3326 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3327 EXT4_SB(sb)->s_jquota_fmt, type); 3328 } 3329 3330 /* 3331 * Standard function to be called on quota_on 3332 */ 3333 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3334 char *name, int remount) 3335 { 3336 int err; 3337 struct path path; 3338 3339 if (!test_opt(sb, QUOTA)) 3340 return -EINVAL; 3341 /* When remounting, no checks are needed and in fact, name is NULL */ 3342 if (remount) 3343 return vfs_quota_on(sb, type, format_id, name, remount); 3344 3345 err = kern_path(name, LOOKUP_FOLLOW, &path); 3346 if (err) 3347 return err; 3348 3349 /* Quotafile not on the same filesystem? */ 3350 if (path.mnt->mnt_sb != sb) { 3351 path_put(&path); 3352 return -EXDEV; 3353 } 3354 /* Journaling quota? */ 3355 if (EXT4_SB(sb)->s_qf_names[type]) { 3356 /* Quotafile not in fs root? */ 3357 if (path.dentry->d_parent != sb->s_root) 3358 printk(KERN_WARNING 3359 "EXT4-fs: Quota file not on filesystem root. " 3360 "Journaled quota will not work.\n"); 3361 } 3362 3363 /* 3364 * When we journal data on quota file, we have to flush journal to see 3365 * all updates to the file when we bypass pagecache... 3366 */ 3367 if (ext4_should_journal_data(path.dentry->d_inode)) { 3368 /* 3369 * We don't need to lock updates but journal_flush() could 3370 * otherwise be livelocked... 3371 */ 3372 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3373 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3374 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3375 if (err) { 3376 path_put(&path); 3377 return err; 3378 } 3379 } 3380 3381 err = vfs_quota_on_path(sb, type, format_id, &path); 3382 path_put(&path); 3383 return err; 3384 } 3385 3386 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3387 * acquiring the locks... As quota files are never truncated and quota code 3388 * itself serializes the operations (and noone else should touch the files) 3389 * we don't have to be afraid of races */ 3390 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3391 size_t len, loff_t off) 3392 { 3393 struct inode *inode = sb_dqopt(sb)->files[type]; 3394 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3395 int err = 0; 3396 int offset = off & (sb->s_blocksize - 1); 3397 int tocopy; 3398 size_t toread; 3399 struct buffer_head *bh; 3400 loff_t i_size = i_size_read(inode); 3401 3402 if (off > i_size) 3403 return 0; 3404 if (off+len > i_size) 3405 len = i_size-off; 3406 toread = len; 3407 while (toread > 0) { 3408 tocopy = sb->s_blocksize - offset < toread ? 3409 sb->s_blocksize - offset : toread; 3410 bh = ext4_bread(NULL, inode, blk, 0, &err); 3411 if (err) 3412 return err; 3413 if (!bh) /* A hole? */ 3414 memset(data, 0, tocopy); 3415 else 3416 memcpy(data, bh->b_data+offset, tocopy); 3417 brelse(bh); 3418 offset = 0; 3419 toread -= tocopy; 3420 data += tocopy; 3421 blk++; 3422 } 3423 return len; 3424 } 3425 3426 /* Write to quotafile (we know the transaction is already started and has 3427 * enough credits) */ 3428 static ssize_t ext4_quota_write(struct super_block *sb, int type, 3429 const char *data, size_t len, loff_t off) 3430 { 3431 struct inode *inode = sb_dqopt(sb)->files[type]; 3432 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3433 int err = 0; 3434 int offset = off & (sb->s_blocksize - 1); 3435 int tocopy; 3436 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 3437 size_t towrite = len; 3438 struct buffer_head *bh; 3439 handle_t *handle = journal_current_handle(); 3440 3441 if (!handle) { 3442 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" 3443 " cancelled because transaction is not started.\n", 3444 (unsigned long long)off, (unsigned long long)len); 3445 return -EIO; 3446 } 3447 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 3448 while (towrite > 0) { 3449 tocopy = sb->s_blocksize - offset < towrite ? 3450 sb->s_blocksize - offset : towrite; 3451 bh = ext4_bread(handle, inode, blk, 1, &err); 3452 if (!bh) 3453 goto out; 3454 if (journal_quota) { 3455 err = ext4_journal_get_write_access(handle, bh); 3456 if (err) { 3457 brelse(bh); 3458 goto out; 3459 } 3460 } 3461 lock_buffer(bh); 3462 memcpy(bh->b_data+offset, data, tocopy); 3463 flush_dcache_page(bh->b_page); 3464 unlock_buffer(bh); 3465 if (journal_quota) 3466 err = ext4_journal_dirty_metadata(handle, bh); 3467 else { 3468 /* Always do at least ordered writes for quotas */ 3469 err = ext4_jbd2_file_inode(handle, inode); 3470 mark_buffer_dirty(bh); 3471 } 3472 brelse(bh); 3473 if (err) 3474 goto out; 3475 offset = 0; 3476 towrite -= tocopy; 3477 data += tocopy; 3478 blk++; 3479 } 3480 out: 3481 if (len == towrite) { 3482 mutex_unlock(&inode->i_mutex); 3483 return err; 3484 } 3485 if (inode->i_size < off+len-towrite) { 3486 i_size_write(inode, off+len-towrite); 3487 EXT4_I(inode)->i_disksize = inode->i_size; 3488 } 3489 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3490 ext4_mark_inode_dirty(handle, inode); 3491 mutex_unlock(&inode->i_mutex); 3492 return len - towrite; 3493 } 3494 3495 #endif 3496 3497 static int ext4_get_sb(struct file_system_type *fs_type, 3498 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3499 { 3500 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3501 } 3502 3503 #ifdef CONFIG_PROC_FS 3504 static int ext4_ui_proc_show(struct seq_file *m, void *v) 3505 { 3506 unsigned int *p = m->private; 3507 3508 seq_printf(m, "%u\n", *p); 3509 return 0; 3510 } 3511 3512 static int ext4_ui_proc_open(struct inode *inode, struct file *file) 3513 { 3514 return single_open(file, ext4_ui_proc_show, PDE(inode)->data); 3515 } 3516 3517 static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, 3518 size_t cnt, loff_t *ppos) 3519 { 3520 unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; 3521 char str[32]; 3522 unsigned long value; 3523 3524 if (cnt >= sizeof(str)) 3525 return -EINVAL; 3526 if (copy_from_user(str, buf, cnt)) 3527 return -EFAULT; 3528 value = simple_strtol(str, NULL, 0); 3529 if (value < 0) 3530 return -ERANGE; 3531 *p = value; 3532 return cnt; 3533 } 3534 3535 const struct file_operations ext4_ui_proc_fops = { 3536 .owner = THIS_MODULE, 3537 .open = ext4_ui_proc_open, 3538 .read = seq_read, 3539 .llseek = seq_lseek, 3540 .release = single_release, 3541 .write = ext4_ui_proc_write, 3542 }; 3543 #endif 3544 3545 static struct file_system_type ext4_fs_type = { 3546 .owner = THIS_MODULE, 3547 .name = "ext4", 3548 .get_sb = ext4_get_sb, 3549 .kill_sb = kill_block_super, 3550 .fs_flags = FS_REQUIRES_DEV, 3551 }; 3552 3553 #ifdef CONFIG_EXT4DEV_COMPAT 3554 static int ext4dev_get_sb(struct file_system_type *fs_type, 3555 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 3556 { 3557 printk(KERN_WARNING "EXT4-fs: Update your userspace programs " 3558 "to mount using ext4\n"); 3559 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " 3560 "will go away by 2.6.31\n"); 3561 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3562 } 3563 3564 static struct file_system_type ext4dev_fs_type = { 3565 .owner = THIS_MODULE, 3566 .name = "ext4dev", 3567 .get_sb = ext4dev_get_sb, 3568 .kill_sb = kill_block_super, 3569 .fs_flags = FS_REQUIRES_DEV, 3570 }; 3571 MODULE_ALIAS("ext4dev"); 3572 #endif 3573 3574 static int __init init_ext4_fs(void) 3575 { 3576 int err; 3577 3578 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3579 err = init_ext4_mballoc(); 3580 if (err) 3581 return err; 3582 3583 err = init_ext4_xattr(); 3584 if (err) 3585 goto out2; 3586 err = init_inodecache(); 3587 if (err) 3588 goto out1; 3589 err = register_filesystem(&ext4_fs_type); 3590 if (err) 3591 goto out; 3592 #ifdef CONFIG_EXT4DEV_COMPAT 3593 err = register_filesystem(&ext4dev_fs_type); 3594 if (err) { 3595 unregister_filesystem(&ext4_fs_type); 3596 goto out; 3597 } 3598 #endif 3599 return 0; 3600 out: 3601 destroy_inodecache(); 3602 out1: 3603 exit_ext4_xattr(); 3604 out2: 3605 exit_ext4_mballoc(); 3606 return err; 3607 } 3608 3609 static void __exit exit_ext4_fs(void) 3610 { 3611 unregister_filesystem(&ext4_fs_type); 3612 #ifdef CONFIG_EXT4DEV_COMPAT 3613 unregister_filesystem(&ext4dev_fs_type); 3614 #endif 3615 destroy_inodecache(); 3616 exit_ext4_xattr(); 3617 exit_ext4_mballoc(); 3618 remove_proc_entry("fs/ext4", NULL); 3619 } 3620 3621 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3622 MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); 3623 MODULE_LICENSE("GPL"); 3624 module_init(init_ext4_fs) 3625 module_exit(exit_ext4_fs) 3626