1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/jbd2.h> 24 #include <linux/ext4_fs.h> 25 #include <linux/ext4_jbd2.h> 26 #include <linux/slab.h> 27 #include <linux/init.h> 28 #include <linux/blkdev.h> 29 #include <linux/parser.h> 30 #include <linux/smp_lock.h> 31 #include <linux/buffer_head.h> 32 #include <linux/vfs.h> 33 #include <linux/random.h> 34 #include <linux/mount.h> 35 #include <linux/namei.h> 36 #include <linux/quotaops.h> 37 #include <linux/seq_file.h> 38 39 #include <asm/uaccess.h> 40 41 #include "xattr.h" 42 #include "acl.h" 43 #include "namei.h" 44 45 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 46 unsigned long journal_devnum); 47 static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 48 unsigned int); 49 static void ext4_commit_super (struct super_block * sb, 50 struct ext4_super_block * es, 51 int sync); 52 static void ext4_mark_recovery_complete(struct super_block * sb, 53 struct ext4_super_block * es); 54 static void ext4_clear_journal_err(struct super_block * sb, 55 struct ext4_super_block * es); 56 static int ext4_sync_fs(struct super_block *sb, int wait); 57 static const char *ext4_decode_error(struct super_block * sb, int errno, 58 char nbuf[16]); 59 static int ext4_remount (struct super_block * sb, int * flags, char * data); 60 static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); 61 static void ext4_unlockfs(struct super_block *sb); 62 static void ext4_write_super (struct super_block * sb); 63 static void ext4_write_super_lockfs(struct super_block *sb); 64 65 66 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 67 struct ext4_group_desc *bg) 68 { 69 return le32_to_cpu(bg->bg_block_bitmap) | 70 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 71 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 72 } 73 74 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 75 struct ext4_group_desc *bg) 76 { 77 return le32_to_cpu(bg->bg_inode_bitmap) | 78 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 79 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 80 } 81 82 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 83 struct ext4_group_desc *bg) 84 { 85 return le32_to_cpu(bg->bg_inode_table) | 86 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 87 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 88 } 89 90 void ext4_block_bitmap_set(struct super_block *sb, 91 struct ext4_group_desc *bg, ext4_fsblk_t blk) 92 { 93 bg->bg_block_bitmap = cpu_to_le32((u32)blk); 94 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 95 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 96 } 97 98 void ext4_inode_bitmap_set(struct super_block *sb, 99 struct ext4_group_desc *bg, ext4_fsblk_t blk) 100 { 101 bg->bg_inode_bitmap = cpu_to_le32((u32)blk); 102 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 103 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 104 } 105 106 void ext4_inode_table_set(struct super_block *sb, 107 struct ext4_group_desc *bg, ext4_fsblk_t blk) 108 { 109 bg->bg_inode_table = cpu_to_le32((u32)blk); 110 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 111 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 112 } 113 114 /* 115 * Wrappers for jbd2_journal_start/end. 116 * 117 * The only special thing we need to do here is to make sure that all 118 * journal_end calls result in the superblock being marked dirty, so 119 * that sync() will call the filesystem's write_super callback if 120 * appropriate. 121 */ 122 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 123 { 124 journal_t *journal; 125 126 if (sb->s_flags & MS_RDONLY) 127 return ERR_PTR(-EROFS); 128 129 /* Special case here: if the journal has aborted behind our 130 * backs (eg. EIO in the commit thread), then we still need to 131 * take the FS itself readonly cleanly. */ 132 journal = EXT4_SB(sb)->s_journal; 133 if (is_journal_aborted(journal)) { 134 ext4_abort(sb, __FUNCTION__, 135 "Detected aborted journal"); 136 return ERR_PTR(-EROFS); 137 } 138 139 return jbd2_journal_start(journal, nblocks); 140 } 141 142 /* 143 * The only special thing we need to do here is to make sure that all 144 * jbd2_journal_stop calls result in the superblock being marked dirty, so 145 * that sync() will call the filesystem's write_super callback if 146 * appropriate. 147 */ 148 int __ext4_journal_stop(const char *where, handle_t *handle) 149 { 150 struct super_block *sb; 151 int err; 152 int rc; 153 154 sb = handle->h_transaction->t_journal->j_private; 155 err = handle->h_err; 156 rc = jbd2_journal_stop(handle); 157 158 if (!err) 159 err = rc; 160 if (err) 161 __ext4_std_error(sb, where, err); 162 return err; 163 } 164 165 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 166 struct buffer_head *bh, handle_t *handle, int err) 167 { 168 char nbuf[16]; 169 const char *errstr = ext4_decode_error(NULL, err, nbuf); 170 171 if (bh) 172 BUFFER_TRACE(bh, "abort"); 173 174 if (!handle->h_err) 175 handle->h_err = err; 176 177 if (is_handle_aborted(handle)) 178 return; 179 180 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 181 caller, errstr, err_fn); 182 183 jbd2_journal_abort_handle(handle); 184 } 185 186 /* Deal with the reporting of failure conditions on a filesystem such as 187 * inconsistencies detected or read IO failures. 188 * 189 * On ext2, we can store the error state of the filesystem in the 190 * superblock. That is not possible on ext4, because we may have other 191 * write ordering constraints on the superblock which prevent us from 192 * writing it out straight away; and given that the journal is about to 193 * be aborted, we can't rely on the current, or future, transactions to 194 * write out the superblock safely. 195 * 196 * We'll just use the jbd2_journal_abort() error code to record an error in 197 * the journal instead. On recovery, the journal will compain about 198 * that error until we've noted it down and cleared it. 199 */ 200 201 static void ext4_handle_error(struct super_block *sb) 202 { 203 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 204 205 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 206 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 207 208 if (sb->s_flags & MS_RDONLY) 209 return; 210 211 if (!test_opt (sb, ERRORS_CONT)) { 212 journal_t *journal = EXT4_SB(sb)->s_journal; 213 214 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 215 if (journal) 216 jbd2_journal_abort(journal, -EIO); 217 } 218 if (test_opt (sb, ERRORS_RO)) { 219 printk (KERN_CRIT "Remounting filesystem read-only\n"); 220 sb->s_flags |= MS_RDONLY; 221 } 222 ext4_commit_super(sb, es, 1); 223 if (test_opt(sb, ERRORS_PANIC)) 224 panic("EXT4-fs (device %s): panic forced after error\n", 225 sb->s_id); 226 } 227 228 void ext4_error (struct super_block * sb, const char * function, 229 const char * fmt, ...) 230 { 231 va_list args; 232 233 va_start(args, fmt); 234 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 235 vprintk(fmt, args); 236 printk("\n"); 237 va_end(args); 238 239 ext4_handle_error(sb); 240 } 241 242 static const char *ext4_decode_error(struct super_block * sb, int errno, 243 char nbuf[16]) 244 { 245 char *errstr = NULL; 246 247 switch (errno) { 248 case -EIO: 249 errstr = "IO failure"; 250 break; 251 case -ENOMEM: 252 errstr = "Out of memory"; 253 break; 254 case -EROFS: 255 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) 256 errstr = "Journal has aborted"; 257 else 258 errstr = "Readonly filesystem"; 259 break; 260 default: 261 /* If the caller passed in an extra buffer for unknown 262 * errors, textualise them now. Else we just return 263 * NULL. */ 264 if (nbuf) { 265 /* Check for truncated error codes... */ 266 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 267 errstr = nbuf; 268 } 269 break; 270 } 271 272 return errstr; 273 } 274 275 /* __ext4_std_error decodes expected errors from journaling functions 276 * automatically and invokes the appropriate error response. */ 277 278 void __ext4_std_error (struct super_block * sb, const char * function, 279 int errno) 280 { 281 char nbuf[16]; 282 const char *errstr; 283 284 /* Special case: if the error is EROFS, and we're not already 285 * inside a transaction, then there's really no point in logging 286 * an error. */ 287 if (errno == -EROFS && journal_current_handle() == NULL && 288 (sb->s_flags & MS_RDONLY)) 289 return; 290 291 errstr = ext4_decode_error(sb, errno, nbuf); 292 printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 293 sb->s_id, function, errstr); 294 295 ext4_handle_error(sb); 296 } 297 298 /* 299 * ext4_abort is a much stronger failure handler than ext4_error. The 300 * abort function may be used to deal with unrecoverable failures such 301 * as journal IO errors or ENOMEM at a critical moment in log management. 302 * 303 * We unconditionally force the filesystem into an ABORT|READONLY state, 304 * unless the error response on the fs has been set to panic in which 305 * case we take the easy way out and panic immediately. 306 */ 307 308 void ext4_abort (struct super_block * sb, const char * function, 309 const char * fmt, ...) 310 { 311 va_list args; 312 313 printk (KERN_CRIT "ext4_abort called.\n"); 314 315 va_start(args, fmt); 316 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 317 vprintk(fmt, args); 318 printk("\n"); 319 va_end(args); 320 321 if (test_opt(sb, ERRORS_PANIC)) 322 panic("EXT4-fs panic from previous error\n"); 323 324 if (sb->s_flags & MS_RDONLY) 325 return; 326 327 printk(KERN_CRIT "Remounting filesystem read-only\n"); 328 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 329 sb->s_flags |= MS_RDONLY; 330 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 331 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 332 } 333 334 void ext4_warning (struct super_block * sb, const char * function, 335 const char * fmt, ...) 336 { 337 va_list args; 338 339 va_start(args, fmt); 340 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 341 sb->s_id, function); 342 vprintk(fmt, args); 343 printk("\n"); 344 va_end(args); 345 } 346 347 void ext4_update_dynamic_rev(struct super_block *sb) 348 { 349 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 350 351 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 352 return; 353 354 ext4_warning(sb, __FUNCTION__, 355 "updating to rev %d because of new feature flag, " 356 "running e2fsck is recommended", 357 EXT4_DYNAMIC_REV); 358 359 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 360 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 361 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 362 /* leave es->s_feature_*compat flags alone */ 363 /* es->s_uuid will be set by e2fsck if empty */ 364 365 /* 366 * The rest of the superblock fields should be zero, and if not it 367 * means they are likely already in use, so leave them alone. We 368 * can leave it up to e2fsck to clean up any inconsistencies there. 369 */ 370 } 371 372 /* 373 * Open the external journal device 374 */ 375 static struct block_device *ext4_blkdev_get(dev_t dev) 376 { 377 struct block_device *bdev; 378 char b[BDEVNAME_SIZE]; 379 380 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 381 if (IS_ERR(bdev)) 382 goto fail; 383 return bdev; 384 385 fail: 386 printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n", 387 __bdevname(dev, b), PTR_ERR(bdev)); 388 return NULL; 389 } 390 391 /* 392 * Release the journal device 393 */ 394 static int ext4_blkdev_put(struct block_device *bdev) 395 { 396 bd_release(bdev); 397 return blkdev_put(bdev); 398 } 399 400 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 401 { 402 struct block_device *bdev; 403 int ret = -ENODEV; 404 405 bdev = sbi->journal_bdev; 406 if (bdev) { 407 ret = ext4_blkdev_put(bdev); 408 sbi->journal_bdev = NULL; 409 } 410 return ret; 411 } 412 413 static inline struct inode *orphan_list_entry(struct list_head *l) 414 { 415 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 416 } 417 418 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 419 { 420 struct list_head *l; 421 422 printk(KERN_ERR "sb orphan head is %d\n", 423 le32_to_cpu(sbi->s_es->s_last_orphan)); 424 425 printk(KERN_ERR "sb_info orphan list:\n"); 426 list_for_each(l, &sbi->s_orphan) { 427 struct inode *inode = orphan_list_entry(l); 428 printk(KERN_ERR " " 429 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 430 inode->i_sb->s_id, inode->i_ino, inode, 431 inode->i_mode, inode->i_nlink, 432 NEXT_ORPHAN(inode)); 433 } 434 } 435 436 static void ext4_put_super (struct super_block * sb) 437 { 438 struct ext4_sb_info *sbi = EXT4_SB(sb); 439 struct ext4_super_block *es = sbi->s_es; 440 int i; 441 442 ext4_ext_release(sb); 443 ext4_xattr_put_super(sb); 444 jbd2_journal_destroy(sbi->s_journal); 445 if (!(sb->s_flags & MS_RDONLY)) { 446 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 447 es->s_state = cpu_to_le16(sbi->s_mount_state); 448 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 449 mark_buffer_dirty(sbi->s_sbh); 450 ext4_commit_super(sb, es, 1); 451 } 452 453 for (i = 0; i < sbi->s_gdb_count; i++) 454 brelse(sbi->s_group_desc[i]); 455 kfree(sbi->s_group_desc); 456 percpu_counter_destroy(&sbi->s_freeblocks_counter); 457 percpu_counter_destroy(&sbi->s_freeinodes_counter); 458 percpu_counter_destroy(&sbi->s_dirs_counter); 459 brelse(sbi->s_sbh); 460 #ifdef CONFIG_QUOTA 461 for (i = 0; i < MAXQUOTAS; i++) 462 kfree(sbi->s_qf_names[i]); 463 #endif 464 465 /* Debugging code just in case the in-memory inode orphan list 466 * isn't empty. The on-disk one can be non-empty if we've 467 * detected an error and taken the fs readonly, but the 468 * in-memory list had better be clean by this point. */ 469 if (!list_empty(&sbi->s_orphan)) 470 dump_orphan_list(sb, sbi); 471 J_ASSERT(list_empty(&sbi->s_orphan)); 472 473 invalidate_bdev(sb->s_bdev); 474 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 475 /* 476 * Invalidate the journal device's buffers. We don't want them 477 * floating about in memory - the physical journal device may 478 * hotswapped, and it breaks the `ro-after' testing code. 479 */ 480 sync_blockdev(sbi->journal_bdev); 481 invalidate_bdev(sbi->journal_bdev); 482 ext4_blkdev_remove(sbi); 483 } 484 sb->s_fs_info = NULL; 485 kfree(sbi); 486 return; 487 } 488 489 static struct kmem_cache *ext4_inode_cachep; 490 491 /* 492 * Called inside transaction, so use GFP_NOFS 493 */ 494 static struct inode *ext4_alloc_inode(struct super_block *sb) 495 { 496 struct ext4_inode_info *ei; 497 498 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 499 if (!ei) 500 return NULL; 501 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 502 ei->i_acl = EXT4_ACL_NOT_CACHED; 503 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 504 #endif 505 ei->i_block_alloc_info = NULL; 506 ei->vfs_inode.i_version = 1; 507 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 508 return &ei->vfs_inode; 509 } 510 511 static void ext4_destroy_inode(struct inode *inode) 512 { 513 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 514 } 515 516 static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) 517 { 518 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 519 520 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 521 SLAB_CTOR_CONSTRUCTOR) { 522 INIT_LIST_HEAD(&ei->i_orphan); 523 #ifdef CONFIG_EXT4DEV_FS_XATTR 524 init_rwsem(&ei->xattr_sem); 525 #endif 526 mutex_init(&ei->truncate_mutex); 527 inode_init_once(&ei->vfs_inode); 528 } 529 } 530 531 static int init_inodecache(void) 532 { 533 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 534 sizeof(struct ext4_inode_info), 535 0, (SLAB_RECLAIM_ACCOUNT| 536 SLAB_MEM_SPREAD), 537 init_once, NULL); 538 if (ext4_inode_cachep == NULL) 539 return -ENOMEM; 540 return 0; 541 } 542 543 static void destroy_inodecache(void) 544 { 545 kmem_cache_destroy(ext4_inode_cachep); 546 } 547 548 static void ext4_clear_inode(struct inode *inode) 549 { 550 struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; 551 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 552 if (EXT4_I(inode)->i_acl && 553 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 554 posix_acl_release(EXT4_I(inode)->i_acl); 555 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; 556 } 557 if (EXT4_I(inode)->i_default_acl && 558 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { 559 posix_acl_release(EXT4_I(inode)->i_default_acl); 560 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 561 } 562 #endif 563 ext4_discard_reservation(inode); 564 EXT4_I(inode)->i_block_alloc_info = NULL; 565 if (unlikely(rsv)) 566 kfree(rsv); 567 } 568 569 static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 570 { 571 #if defined(CONFIG_QUOTA) 572 struct ext4_sb_info *sbi = EXT4_SB(sb); 573 574 if (sbi->s_jquota_fmt) 575 seq_printf(seq, ",jqfmt=%s", 576 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 577 578 if (sbi->s_qf_names[USRQUOTA]) 579 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 580 581 if (sbi->s_qf_names[GRPQUOTA]) 582 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 583 584 if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) 585 seq_puts(seq, ",usrquota"); 586 587 if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) 588 seq_puts(seq, ",grpquota"); 589 #endif 590 } 591 592 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 593 { 594 struct super_block *sb = vfs->mnt_sb; 595 596 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 597 seq_puts(seq, ",data=journal"); 598 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 599 seq_puts(seq, ",data=ordered"); 600 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 601 seq_puts(seq, ",data=writeback"); 602 603 ext4_show_quota_options(seq, sb); 604 605 return 0; 606 } 607 608 609 static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp) 610 { 611 __u32 *objp = vobjp; 612 unsigned long ino = objp[0]; 613 __u32 generation = objp[1]; 614 struct inode *inode; 615 struct dentry *result; 616 617 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 618 return ERR_PTR(-ESTALE); 619 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 620 return ERR_PTR(-ESTALE); 621 622 /* iget isn't really right if the inode is currently unallocated!! 623 * 624 * ext4_read_inode will return a bad_inode if the inode had been 625 * deleted, so we should be safe. 626 * 627 * Currently we don't know the generation for parent directory, so 628 * a generation of 0 means "accept any" 629 */ 630 inode = iget(sb, ino); 631 if (inode == NULL) 632 return ERR_PTR(-ENOMEM); 633 if (is_bad_inode(inode) || 634 (generation && inode->i_generation != generation)) { 635 iput(inode); 636 return ERR_PTR(-ESTALE); 637 } 638 /* now to find a dentry. 639 * If possible, get a well-connected one 640 */ 641 result = d_alloc_anon(inode); 642 if (!result) { 643 iput(inode); 644 return ERR_PTR(-ENOMEM); 645 } 646 return result; 647 } 648 649 #ifdef CONFIG_QUOTA 650 #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 651 #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 652 653 static int ext4_dquot_initialize(struct inode *inode, int type); 654 static int ext4_dquot_drop(struct inode *inode); 655 static int ext4_write_dquot(struct dquot *dquot); 656 static int ext4_acquire_dquot(struct dquot *dquot); 657 static int ext4_release_dquot(struct dquot *dquot); 658 static int ext4_mark_dquot_dirty(struct dquot *dquot); 659 static int ext4_write_info(struct super_block *sb, int type); 660 static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path); 661 static int ext4_quota_on_mount(struct super_block *sb, int type); 662 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 663 size_t len, loff_t off); 664 static ssize_t ext4_quota_write(struct super_block *sb, int type, 665 const char *data, size_t len, loff_t off); 666 667 static struct dquot_operations ext4_quota_operations = { 668 .initialize = ext4_dquot_initialize, 669 .drop = ext4_dquot_drop, 670 .alloc_space = dquot_alloc_space, 671 .alloc_inode = dquot_alloc_inode, 672 .free_space = dquot_free_space, 673 .free_inode = dquot_free_inode, 674 .transfer = dquot_transfer, 675 .write_dquot = ext4_write_dquot, 676 .acquire_dquot = ext4_acquire_dquot, 677 .release_dquot = ext4_release_dquot, 678 .mark_dirty = ext4_mark_dquot_dirty, 679 .write_info = ext4_write_info 680 }; 681 682 static struct quotactl_ops ext4_qctl_operations = { 683 .quota_on = ext4_quota_on, 684 .quota_off = vfs_quota_off, 685 .quota_sync = vfs_quota_sync, 686 .get_info = vfs_get_dqinfo, 687 .set_info = vfs_set_dqinfo, 688 .get_dqblk = vfs_get_dqblk, 689 .set_dqblk = vfs_set_dqblk 690 }; 691 #endif 692 693 static const struct super_operations ext4_sops = { 694 .alloc_inode = ext4_alloc_inode, 695 .destroy_inode = ext4_destroy_inode, 696 .read_inode = ext4_read_inode, 697 .write_inode = ext4_write_inode, 698 .dirty_inode = ext4_dirty_inode, 699 .delete_inode = ext4_delete_inode, 700 .put_super = ext4_put_super, 701 .write_super = ext4_write_super, 702 .sync_fs = ext4_sync_fs, 703 .write_super_lockfs = ext4_write_super_lockfs, 704 .unlockfs = ext4_unlockfs, 705 .statfs = ext4_statfs, 706 .remount_fs = ext4_remount, 707 .clear_inode = ext4_clear_inode, 708 .show_options = ext4_show_options, 709 #ifdef CONFIG_QUOTA 710 .quota_read = ext4_quota_read, 711 .quota_write = ext4_quota_write, 712 #endif 713 }; 714 715 static struct export_operations ext4_export_ops = { 716 .get_parent = ext4_get_parent, 717 .get_dentry = ext4_get_dentry, 718 }; 719 720 enum { 721 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 722 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 723 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 724 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 725 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 726 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 727 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 728 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 729 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 730 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 731 Opt_grpquota, Opt_extents, 732 }; 733 734 static match_table_t tokens = { 735 {Opt_bsd_df, "bsddf"}, 736 {Opt_minix_df, "minixdf"}, 737 {Opt_grpid, "grpid"}, 738 {Opt_grpid, "bsdgroups"}, 739 {Opt_nogrpid, "nogrpid"}, 740 {Opt_nogrpid, "sysvgroups"}, 741 {Opt_resgid, "resgid=%u"}, 742 {Opt_resuid, "resuid=%u"}, 743 {Opt_sb, "sb=%u"}, 744 {Opt_err_cont, "errors=continue"}, 745 {Opt_err_panic, "errors=panic"}, 746 {Opt_err_ro, "errors=remount-ro"}, 747 {Opt_nouid32, "nouid32"}, 748 {Opt_nocheck, "nocheck"}, 749 {Opt_nocheck, "check=none"}, 750 {Opt_debug, "debug"}, 751 {Opt_oldalloc, "oldalloc"}, 752 {Opt_orlov, "orlov"}, 753 {Opt_user_xattr, "user_xattr"}, 754 {Opt_nouser_xattr, "nouser_xattr"}, 755 {Opt_acl, "acl"}, 756 {Opt_noacl, "noacl"}, 757 {Opt_reservation, "reservation"}, 758 {Opt_noreservation, "noreservation"}, 759 {Opt_noload, "noload"}, 760 {Opt_nobh, "nobh"}, 761 {Opt_bh, "bh"}, 762 {Opt_commit, "commit=%u"}, 763 {Opt_journal_update, "journal=update"}, 764 {Opt_journal_inum, "journal=%u"}, 765 {Opt_journal_dev, "journal_dev=%u"}, 766 {Opt_abort, "abort"}, 767 {Opt_data_journal, "data=journal"}, 768 {Opt_data_ordered, "data=ordered"}, 769 {Opt_data_writeback, "data=writeback"}, 770 {Opt_offusrjquota, "usrjquota="}, 771 {Opt_usrjquota, "usrjquota=%s"}, 772 {Opt_offgrpjquota, "grpjquota="}, 773 {Opt_grpjquota, "grpjquota=%s"}, 774 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 775 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 776 {Opt_grpquota, "grpquota"}, 777 {Opt_noquota, "noquota"}, 778 {Opt_quota, "quota"}, 779 {Opt_usrquota, "usrquota"}, 780 {Opt_barrier, "barrier=%u"}, 781 {Opt_extents, "extents"}, 782 {Opt_err, NULL}, 783 {Opt_resize, "resize"}, 784 }; 785 786 static ext4_fsblk_t get_sb_block(void **data) 787 { 788 ext4_fsblk_t sb_block; 789 char *options = (char *) *data; 790 791 if (!options || strncmp(options, "sb=", 3) != 0) 792 return 1; /* Default location */ 793 options += 3; 794 /*todo: use simple_strtoll with >32bit ext4 */ 795 sb_block = simple_strtoul(options, &options, 0); 796 if (*options && *options != ',') { 797 printk("EXT4-fs: Invalid sb specification: %s\n", 798 (char *) *data); 799 return 1; 800 } 801 if (*options == ',') 802 options++; 803 *data = (void *) options; 804 return sb_block; 805 } 806 807 static int parse_options (char *options, struct super_block *sb, 808 unsigned int *inum, unsigned long *journal_devnum, 809 ext4_fsblk_t *n_blocks_count, int is_remount) 810 { 811 struct ext4_sb_info *sbi = EXT4_SB(sb); 812 char * p; 813 substring_t args[MAX_OPT_ARGS]; 814 int data_opt = 0; 815 int option; 816 #ifdef CONFIG_QUOTA 817 int qtype; 818 char *qname; 819 #endif 820 821 if (!options) 822 return 1; 823 824 while ((p = strsep (&options, ",")) != NULL) { 825 int token; 826 if (!*p) 827 continue; 828 829 token = match_token(p, tokens, args); 830 switch (token) { 831 case Opt_bsd_df: 832 clear_opt (sbi->s_mount_opt, MINIX_DF); 833 break; 834 case Opt_minix_df: 835 set_opt (sbi->s_mount_opt, MINIX_DF); 836 break; 837 case Opt_grpid: 838 set_opt (sbi->s_mount_opt, GRPID); 839 break; 840 case Opt_nogrpid: 841 clear_opt (sbi->s_mount_opt, GRPID); 842 break; 843 case Opt_resuid: 844 if (match_int(&args[0], &option)) 845 return 0; 846 sbi->s_resuid = option; 847 break; 848 case Opt_resgid: 849 if (match_int(&args[0], &option)) 850 return 0; 851 sbi->s_resgid = option; 852 break; 853 case Opt_sb: 854 /* handled by get_sb_block() instead of here */ 855 /* *sb_block = match_int(&args[0]); */ 856 break; 857 case Opt_err_panic: 858 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 859 clear_opt (sbi->s_mount_opt, ERRORS_RO); 860 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 861 break; 862 case Opt_err_ro: 863 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 864 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 865 set_opt (sbi->s_mount_opt, ERRORS_RO); 866 break; 867 case Opt_err_cont: 868 clear_opt (sbi->s_mount_opt, ERRORS_RO); 869 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 870 set_opt (sbi->s_mount_opt, ERRORS_CONT); 871 break; 872 case Opt_nouid32: 873 set_opt (sbi->s_mount_opt, NO_UID32); 874 break; 875 case Opt_nocheck: 876 clear_opt (sbi->s_mount_opt, CHECK); 877 break; 878 case Opt_debug: 879 set_opt (sbi->s_mount_opt, DEBUG); 880 break; 881 case Opt_oldalloc: 882 set_opt (sbi->s_mount_opt, OLDALLOC); 883 break; 884 case Opt_orlov: 885 clear_opt (sbi->s_mount_opt, OLDALLOC); 886 break; 887 #ifdef CONFIG_EXT4DEV_FS_XATTR 888 case Opt_user_xattr: 889 set_opt (sbi->s_mount_opt, XATTR_USER); 890 break; 891 case Opt_nouser_xattr: 892 clear_opt (sbi->s_mount_opt, XATTR_USER); 893 break; 894 #else 895 case Opt_user_xattr: 896 case Opt_nouser_xattr: 897 printk("EXT4 (no)user_xattr options not supported\n"); 898 break; 899 #endif 900 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 901 case Opt_acl: 902 set_opt(sbi->s_mount_opt, POSIX_ACL); 903 break; 904 case Opt_noacl: 905 clear_opt(sbi->s_mount_opt, POSIX_ACL); 906 break; 907 #else 908 case Opt_acl: 909 case Opt_noacl: 910 printk("EXT4 (no)acl options not supported\n"); 911 break; 912 #endif 913 case Opt_reservation: 914 set_opt(sbi->s_mount_opt, RESERVATION); 915 break; 916 case Opt_noreservation: 917 clear_opt(sbi->s_mount_opt, RESERVATION); 918 break; 919 case Opt_journal_update: 920 /* @@@ FIXME */ 921 /* Eventually we will want to be able to create 922 a journal file here. For now, only allow the 923 user to specify an existing inode to be the 924 journal file. */ 925 if (is_remount) { 926 printk(KERN_ERR "EXT4-fs: cannot specify " 927 "journal on remount\n"); 928 return 0; 929 } 930 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 931 break; 932 case Opt_journal_inum: 933 if (is_remount) { 934 printk(KERN_ERR "EXT4-fs: cannot specify " 935 "journal on remount\n"); 936 return 0; 937 } 938 if (match_int(&args[0], &option)) 939 return 0; 940 *inum = option; 941 break; 942 case Opt_journal_dev: 943 if (is_remount) { 944 printk(KERN_ERR "EXT4-fs: cannot specify " 945 "journal on remount\n"); 946 return 0; 947 } 948 if (match_int(&args[0], &option)) 949 return 0; 950 *journal_devnum = option; 951 break; 952 case Opt_noload: 953 set_opt (sbi->s_mount_opt, NOLOAD); 954 break; 955 case Opt_commit: 956 if (match_int(&args[0], &option)) 957 return 0; 958 if (option < 0) 959 return 0; 960 if (option == 0) 961 option = JBD_DEFAULT_MAX_COMMIT_AGE; 962 sbi->s_commit_interval = HZ * option; 963 break; 964 case Opt_data_journal: 965 data_opt = EXT4_MOUNT_JOURNAL_DATA; 966 goto datacheck; 967 case Opt_data_ordered: 968 data_opt = EXT4_MOUNT_ORDERED_DATA; 969 goto datacheck; 970 case Opt_data_writeback: 971 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 972 datacheck: 973 if (is_remount) { 974 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) 975 != data_opt) { 976 printk(KERN_ERR 977 "EXT4-fs: cannot change data " 978 "mode on remount\n"); 979 return 0; 980 } 981 } else { 982 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; 983 sbi->s_mount_opt |= data_opt; 984 } 985 break; 986 #ifdef CONFIG_QUOTA 987 case Opt_usrjquota: 988 qtype = USRQUOTA; 989 goto set_qf_name; 990 case Opt_grpjquota: 991 qtype = GRPQUOTA; 992 set_qf_name: 993 if (sb_any_quota_enabled(sb)) { 994 printk(KERN_ERR 995 "EXT4-fs: Cannot change journalled " 996 "quota options when quota turned on.\n"); 997 return 0; 998 } 999 qname = match_strdup(&args[0]); 1000 if (!qname) { 1001 printk(KERN_ERR 1002 "EXT4-fs: not enough memory for " 1003 "storing quotafile name.\n"); 1004 return 0; 1005 } 1006 if (sbi->s_qf_names[qtype] && 1007 strcmp(sbi->s_qf_names[qtype], qname)) { 1008 printk(KERN_ERR 1009 "EXT4-fs: %s quota file already " 1010 "specified.\n", QTYPE2NAME(qtype)); 1011 kfree(qname); 1012 return 0; 1013 } 1014 sbi->s_qf_names[qtype] = qname; 1015 if (strchr(sbi->s_qf_names[qtype], '/')) { 1016 printk(KERN_ERR 1017 "EXT4-fs: quotafile must be on " 1018 "filesystem root.\n"); 1019 kfree(sbi->s_qf_names[qtype]); 1020 sbi->s_qf_names[qtype] = NULL; 1021 return 0; 1022 } 1023 set_opt(sbi->s_mount_opt, QUOTA); 1024 break; 1025 case Opt_offusrjquota: 1026 qtype = USRQUOTA; 1027 goto clear_qf_name; 1028 case Opt_offgrpjquota: 1029 qtype = GRPQUOTA; 1030 clear_qf_name: 1031 if (sb_any_quota_enabled(sb)) { 1032 printk(KERN_ERR "EXT4-fs: Cannot change " 1033 "journalled quota options when " 1034 "quota turned on.\n"); 1035 return 0; 1036 } 1037 /* 1038 * The space will be released later when all options 1039 * are confirmed to be correct 1040 */ 1041 sbi->s_qf_names[qtype] = NULL; 1042 break; 1043 case Opt_jqfmt_vfsold: 1044 sbi->s_jquota_fmt = QFMT_VFS_OLD; 1045 break; 1046 case Opt_jqfmt_vfsv0: 1047 sbi->s_jquota_fmt = QFMT_VFS_V0; 1048 break; 1049 case Opt_quota: 1050 case Opt_usrquota: 1051 set_opt(sbi->s_mount_opt, QUOTA); 1052 set_opt(sbi->s_mount_opt, USRQUOTA); 1053 break; 1054 case Opt_grpquota: 1055 set_opt(sbi->s_mount_opt, QUOTA); 1056 set_opt(sbi->s_mount_opt, GRPQUOTA); 1057 break; 1058 case Opt_noquota: 1059 if (sb_any_quota_enabled(sb)) { 1060 printk(KERN_ERR "EXT4-fs: Cannot change quota " 1061 "options when quota turned on.\n"); 1062 return 0; 1063 } 1064 clear_opt(sbi->s_mount_opt, QUOTA); 1065 clear_opt(sbi->s_mount_opt, USRQUOTA); 1066 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1067 break; 1068 #else 1069 case Opt_quota: 1070 case Opt_usrquota: 1071 case Opt_grpquota: 1072 case Opt_usrjquota: 1073 case Opt_grpjquota: 1074 case Opt_offusrjquota: 1075 case Opt_offgrpjquota: 1076 case Opt_jqfmt_vfsold: 1077 case Opt_jqfmt_vfsv0: 1078 printk(KERN_ERR 1079 "EXT4-fs: journalled quota options not " 1080 "supported.\n"); 1081 break; 1082 case Opt_noquota: 1083 break; 1084 #endif 1085 case Opt_abort: 1086 set_opt(sbi->s_mount_opt, ABORT); 1087 break; 1088 case Opt_barrier: 1089 if (match_int(&args[0], &option)) 1090 return 0; 1091 if (option) 1092 set_opt(sbi->s_mount_opt, BARRIER); 1093 else 1094 clear_opt(sbi->s_mount_opt, BARRIER); 1095 break; 1096 case Opt_ignore: 1097 break; 1098 case Opt_resize: 1099 if (!is_remount) { 1100 printk("EXT4-fs: resize option only available " 1101 "for remount\n"); 1102 return 0; 1103 } 1104 if (match_int(&args[0], &option) != 0) 1105 return 0; 1106 *n_blocks_count = option; 1107 break; 1108 case Opt_nobh: 1109 set_opt(sbi->s_mount_opt, NOBH); 1110 break; 1111 case Opt_bh: 1112 clear_opt(sbi->s_mount_opt, NOBH); 1113 break; 1114 case Opt_extents: 1115 set_opt (sbi->s_mount_opt, EXTENTS); 1116 break; 1117 default: 1118 printk (KERN_ERR 1119 "EXT4-fs: Unrecognized mount option \"%s\" " 1120 "or missing value\n", p); 1121 return 0; 1122 } 1123 } 1124 #ifdef CONFIG_QUOTA 1125 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1126 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && 1127 sbi->s_qf_names[USRQUOTA]) 1128 clear_opt(sbi->s_mount_opt, USRQUOTA); 1129 1130 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && 1131 sbi->s_qf_names[GRPQUOTA]) 1132 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1133 1134 if ((sbi->s_qf_names[USRQUOTA] && 1135 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || 1136 (sbi->s_qf_names[GRPQUOTA] && 1137 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { 1138 printk(KERN_ERR "EXT4-fs: old and new quota " 1139 "format mixing.\n"); 1140 return 0; 1141 } 1142 1143 if (!sbi->s_jquota_fmt) { 1144 printk(KERN_ERR "EXT4-fs: journalled quota format " 1145 "not specified.\n"); 1146 return 0; 1147 } 1148 } else { 1149 if (sbi->s_jquota_fmt) { 1150 printk(KERN_ERR "EXT4-fs: journalled quota format " 1151 "specified with no journalling " 1152 "enabled.\n"); 1153 return 0; 1154 } 1155 } 1156 #endif 1157 return 1; 1158 } 1159 1160 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1161 int read_only) 1162 { 1163 struct ext4_sb_info *sbi = EXT4_SB(sb); 1164 int res = 0; 1165 1166 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1167 printk (KERN_ERR "EXT4-fs warning: revision level too high, " 1168 "forcing read-only mode\n"); 1169 res = MS_RDONLY; 1170 } 1171 if (read_only) 1172 return res; 1173 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1174 printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1175 "running e2fsck is recommended\n"); 1176 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1177 printk (KERN_WARNING 1178 "EXT4-fs warning: mounting fs with errors, " 1179 "running e2fsck is recommended\n"); 1180 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1181 le16_to_cpu(es->s_mnt_count) >= 1182 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1183 printk (KERN_WARNING 1184 "EXT4-fs warning: maximal mount count reached, " 1185 "running e2fsck is recommended\n"); 1186 else if (le32_to_cpu(es->s_checkinterval) && 1187 (le32_to_cpu(es->s_lastcheck) + 1188 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1189 printk (KERN_WARNING 1190 "EXT4-fs warning: checktime reached, " 1191 "running e2fsck is recommended\n"); 1192 #if 0 1193 /* @@@ We _will_ want to clear the valid bit if we find 1194 * inconsistencies, to force a fsck at reboot. But for 1195 * a plain journaled filesystem we can keep it set as 1196 * valid forever! :) 1197 */ 1198 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); 1199 #endif 1200 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1201 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1202 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1203 es->s_mtime = cpu_to_le32(get_seconds()); 1204 ext4_update_dynamic_rev(sb); 1205 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1206 1207 ext4_commit_super(sb, es, 1); 1208 if (test_opt(sb, DEBUG)) 1209 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, " 1210 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1211 sb->s_blocksize, 1212 sbi->s_groups_count, 1213 EXT4_BLOCKS_PER_GROUP(sb), 1214 EXT4_INODES_PER_GROUP(sb), 1215 sbi->s_mount_opt); 1216 1217 printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); 1218 if (EXT4_SB(sb)->s_journal->j_inode == NULL) { 1219 char b[BDEVNAME_SIZE]; 1220 1221 printk("external journal on %s\n", 1222 bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); 1223 } else { 1224 printk("internal journal\n"); 1225 } 1226 return res; 1227 } 1228 1229 /* Called at mount-time, super-block is locked */ 1230 static int ext4_check_descriptors (struct super_block * sb) 1231 { 1232 struct ext4_sb_info *sbi = EXT4_SB(sb); 1233 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1234 ext4_fsblk_t last_block; 1235 ext4_fsblk_t block_bitmap; 1236 ext4_fsblk_t inode_bitmap; 1237 ext4_fsblk_t inode_table; 1238 struct ext4_group_desc * gdp = NULL; 1239 int desc_block = 0; 1240 int i; 1241 1242 ext4_debug ("Checking group descriptors"); 1243 1244 for (i = 0; i < sbi->s_groups_count; i++) 1245 { 1246 if (i == sbi->s_groups_count - 1) 1247 last_block = ext4_blocks_count(sbi->s_es) - 1; 1248 else 1249 last_block = first_block + 1250 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1251 1252 if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0) 1253 gdp = (struct ext4_group_desc *) 1254 sbi->s_group_desc[desc_block++]->b_data; 1255 block_bitmap = ext4_block_bitmap(sb, gdp); 1256 if (block_bitmap < first_block || block_bitmap > last_block) 1257 { 1258 ext4_error (sb, "ext4_check_descriptors", 1259 "Block bitmap for group %d" 1260 " not in group (block %llu)!", 1261 i, block_bitmap); 1262 return 0; 1263 } 1264 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1265 if (inode_bitmap < first_block || inode_bitmap > last_block) 1266 { 1267 ext4_error (sb, "ext4_check_descriptors", 1268 "Inode bitmap for group %d" 1269 " not in group (block %llu)!", 1270 i, inode_bitmap); 1271 return 0; 1272 } 1273 inode_table = ext4_inode_table(sb, gdp); 1274 if (inode_table < first_block || 1275 inode_table + sbi->s_itb_per_group > last_block) 1276 { 1277 ext4_error (sb, "ext4_check_descriptors", 1278 "Inode table for group %d" 1279 " not in group (block %llu)!", 1280 i, inode_table); 1281 return 0; 1282 } 1283 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1284 gdp = (struct ext4_group_desc *) 1285 ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); 1286 } 1287 1288 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1289 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); 1290 return 1; 1291 } 1292 1293 1294 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1295 * the superblock) which were deleted from all directories, but held open by 1296 * a process at the time of a crash. We walk the list and try to delete these 1297 * inodes at recovery time (only with a read-write filesystem). 1298 * 1299 * In order to keep the orphan inode chain consistent during traversal (in 1300 * case of crash during recovery), we link each inode into the superblock 1301 * orphan list_head and handle it the same way as an inode deletion during 1302 * normal operation (which journals the operations for us). 1303 * 1304 * We only do an iget() and an iput() on each inode, which is very safe if we 1305 * accidentally point at an in-use or already deleted inode. The worst that 1306 * can happen in this case is that we get a "bit already cleared" message from 1307 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1308 * e2fsck was run on this filesystem, and it must have already done the orphan 1309 * inode cleanup for us, so we can safely abort without any further action. 1310 */ 1311 static void ext4_orphan_cleanup (struct super_block * sb, 1312 struct ext4_super_block * es) 1313 { 1314 unsigned int s_flags = sb->s_flags; 1315 int nr_orphans = 0, nr_truncates = 0; 1316 #ifdef CONFIG_QUOTA 1317 int i; 1318 #endif 1319 if (!es->s_last_orphan) { 1320 jbd_debug(4, "no orphan inodes to clean up\n"); 1321 return; 1322 } 1323 1324 if (bdev_read_only(sb->s_bdev)) { 1325 printk(KERN_ERR "EXT4-fs: write access " 1326 "unavailable, skipping orphan cleanup.\n"); 1327 return; 1328 } 1329 1330 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1331 if (es->s_last_orphan) 1332 jbd_debug(1, "Errors on filesystem, " 1333 "clearing orphan list.\n"); 1334 es->s_last_orphan = 0; 1335 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1336 return; 1337 } 1338 1339 if (s_flags & MS_RDONLY) { 1340 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", 1341 sb->s_id); 1342 sb->s_flags &= ~MS_RDONLY; 1343 } 1344 #ifdef CONFIG_QUOTA 1345 /* Needed for iput() to work correctly and not trash data */ 1346 sb->s_flags |= MS_ACTIVE; 1347 /* Turn on quotas so that they are updated correctly */ 1348 for (i = 0; i < MAXQUOTAS; i++) { 1349 if (EXT4_SB(sb)->s_qf_names[i]) { 1350 int ret = ext4_quota_on_mount(sb, i); 1351 if (ret < 0) 1352 printk(KERN_ERR 1353 "EXT4-fs: Cannot turn on journalled " 1354 "quota: error %d\n", ret); 1355 } 1356 } 1357 #endif 1358 1359 while (es->s_last_orphan) { 1360 struct inode *inode; 1361 1362 if (!(inode = 1363 ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1364 es->s_last_orphan = 0; 1365 break; 1366 } 1367 1368 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 1369 DQUOT_INIT(inode); 1370 if (inode->i_nlink) { 1371 printk(KERN_DEBUG 1372 "%s: truncating inode %lu to %Ld bytes\n", 1373 __FUNCTION__, inode->i_ino, inode->i_size); 1374 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1375 inode->i_ino, inode->i_size); 1376 ext4_truncate(inode); 1377 nr_truncates++; 1378 } else { 1379 printk(KERN_DEBUG 1380 "%s: deleting unreferenced inode %lu\n", 1381 __FUNCTION__, inode->i_ino); 1382 jbd_debug(2, "deleting unreferenced inode %lu\n", 1383 inode->i_ino); 1384 nr_orphans++; 1385 } 1386 iput(inode); /* The delete magic happens here! */ 1387 } 1388 1389 #define PLURAL(x) (x), ((x)==1) ? "" : "s" 1390 1391 if (nr_orphans) 1392 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1393 sb->s_id, PLURAL(nr_orphans)); 1394 if (nr_truncates) 1395 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", 1396 sb->s_id, PLURAL(nr_truncates)); 1397 #ifdef CONFIG_QUOTA 1398 /* Turn quotas off */ 1399 for (i = 0; i < MAXQUOTAS; i++) { 1400 if (sb_dqopt(sb)->files[i]) 1401 vfs_quota_off(sb, i); 1402 } 1403 #endif 1404 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1405 } 1406 1407 #define log2(n) ffz(~(n)) 1408 1409 /* 1410 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1411 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1412 * We need to be 1 filesystem block less than the 2^32 sector limit. 1413 */ 1414 static loff_t ext4_max_size(int bits) 1415 { 1416 loff_t res = EXT4_NDIR_BLOCKS; 1417 /* This constant is calculated to be the largest file size for a 1418 * dense, 4k-blocksize file such that the total number of 1419 * sectors in the file, including data and all indirect blocks, 1420 * does not exceed 2^32. */ 1421 const loff_t upper_limit = 0x1ff7fffd000LL; 1422 1423 res += 1LL << (bits-2); 1424 res += 1LL << (2*(bits-2)); 1425 res += 1LL << (3*(bits-2)); 1426 res <<= bits; 1427 if (res > upper_limit) 1428 res = upper_limit; 1429 return res; 1430 } 1431 1432 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 1433 ext4_fsblk_t logical_sb_block, int nr) 1434 { 1435 struct ext4_sb_info *sbi = EXT4_SB(sb); 1436 unsigned long bg, first_meta_bg; 1437 int has_super = 0; 1438 1439 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1440 1441 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 1442 nr < first_meta_bg) 1443 return logical_sb_block + nr + 1; 1444 bg = sbi->s_desc_per_block * nr; 1445 if (ext4_bg_has_super(sb, bg)) 1446 has_super = 1; 1447 return (has_super + ext4_group_first_block_no(sb, bg)); 1448 } 1449 1450 1451 static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1452 { 1453 struct buffer_head * bh; 1454 struct ext4_super_block *es = NULL; 1455 struct ext4_sb_info *sbi; 1456 ext4_fsblk_t block; 1457 ext4_fsblk_t sb_block = get_sb_block(&data); 1458 ext4_fsblk_t logical_sb_block; 1459 unsigned long offset = 0; 1460 unsigned int journal_inum = 0; 1461 unsigned long journal_devnum = 0; 1462 unsigned long def_mount_opts; 1463 struct inode *root; 1464 int blocksize; 1465 int hblock; 1466 int db_count; 1467 int i; 1468 int needs_recovery; 1469 __le32 features; 1470 __u64 blocks_count; 1471 1472 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1473 if (!sbi) 1474 return -ENOMEM; 1475 sb->s_fs_info = sbi; 1476 sbi->s_mount_opt = 0; 1477 sbi->s_resuid = EXT4_DEF_RESUID; 1478 sbi->s_resgid = EXT4_DEF_RESGID; 1479 1480 unlock_kernel(); 1481 1482 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 1483 if (!blocksize) { 1484 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 1485 goto out_fail; 1486 } 1487 1488 /* 1489 * The ext4 superblock will not be buffer aligned for other than 1kB 1490 * block sizes. We need to calculate the offset from buffer start. 1491 */ 1492 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 1493 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 1494 offset = do_div(logical_sb_block, blocksize); 1495 } else { 1496 logical_sb_block = sb_block; 1497 } 1498 1499 if (!(bh = sb_bread(sb, logical_sb_block))) { 1500 printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); 1501 goto out_fail; 1502 } 1503 /* 1504 * Note: s_es must be initialized as soon as possible because 1505 * some ext4 macro-instructions depend on its value 1506 */ 1507 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 1508 sbi->s_es = es; 1509 sb->s_magic = le16_to_cpu(es->s_magic); 1510 if (sb->s_magic != EXT4_SUPER_MAGIC) 1511 goto cantfind_ext4; 1512 1513 /* Set defaults before we parse the mount options */ 1514 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1515 if (def_mount_opts & EXT4_DEFM_DEBUG) 1516 set_opt(sbi->s_mount_opt, DEBUG); 1517 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 1518 set_opt(sbi->s_mount_opt, GRPID); 1519 if (def_mount_opts & EXT4_DEFM_UID16) 1520 set_opt(sbi->s_mount_opt, NO_UID32); 1521 #ifdef CONFIG_EXT4DEV_FS_XATTR 1522 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 1523 set_opt(sbi->s_mount_opt, XATTR_USER); 1524 #endif 1525 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 1526 if (def_mount_opts & EXT4_DEFM_ACL) 1527 set_opt(sbi->s_mount_opt, POSIX_ACL); 1528 #endif 1529 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 1530 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; 1531 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 1532 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; 1533 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 1534 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; 1535 1536 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 1537 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1538 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) 1539 set_opt(sbi->s_mount_opt, ERRORS_RO); 1540 else 1541 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1542 1543 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1544 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1545 1546 set_opt(sbi->s_mount_opt, RESERVATION); 1547 1548 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1549 NULL, 0)) 1550 goto failed_mount; 1551 1552 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1553 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1554 1555 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 1556 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 1557 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1558 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1559 printk(KERN_WARNING 1560 "EXT4-fs warning: feature flags set on rev 0 fs, " 1561 "running e2fsck is recommended\n"); 1562 /* 1563 * Check feature flags regardless of the revision level, since we 1564 * previously didn't change the revision level when setting the flags, 1565 * so there is a chance incompat flags are set on a rev 0 filesystem. 1566 */ 1567 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); 1568 if (features) { 1569 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " 1570 "unsupported optional features (%x).\n", 1571 sb->s_id, le32_to_cpu(features)); 1572 goto failed_mount; 1573 } 1574 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); 1575 if (!(sb->s_flags & MS_RDONLY) && features) { 1576 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " 1577 "unsupported optional features (%x).\n", 1578 sb->s_id, le32_to_cpu(features)); 1579 goto failed_mount; 1580 } 1581 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1582 1583 if (blocksize < EXT4_MIN_BLOCK_SIZE || 1584 blocksize > EXT4_MAX_BLOCK_SIZE) { 1585 printk(KERN_ERR 1586 "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", 1587 blocksize, sb->s_id); 1588 goto failed_mount; 1589 } 1590 1591 hblock = bdev_hardsect_size(sb->s_bdev); 1592 if (sb->s_blocksize != blocksize) { 1593 /* 1594 * Make sure the blocksize for the filesystem is larger 1595 * than the hardware sectorsize for the machine. 1596 */ 1597 if (blocksize < hblock) { 1598 printk(KERN_ERR "EXT4-fs: blocksize %d too small for " 1599 "device blocksize %d.\n", blocksize, hblock); 1600 goto failed_mount; 1601 } 1602 1603 brelse (bh); 1604 sb_set_blocksize(sb, blocksize); 1605 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 1606 offset = do_div(logical_sb_block, blocksize); 1607 bh = sb_bread(sb, logical_sb_block); 1608 if (!bh) { 1609 printk(KERN_ERR 1610 "EXT4-fs: Can't read superblock on 2nd try.\n"); 1611 goto failed_mount; 1612 } 1613 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 1614 sbi->s_es = es; 1615 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 1616 printk (KERN_ERR 1617 "EXT4-fs: Magic mismatch, very weird !\n"); 1618 goto failed_mount; 1619 } 1620 } 1621 1622 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); 1623 1624 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 1625 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 1626 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 1627 } else { 1628 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 1629 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 1630 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 1631 (sbi->s_inode_size & (sbi->s_inode_size - 1)) || 1632 (sbi->s_inode_size > blocksize)) { 1633 printk (KERN_ERR 1634 "EXT4-fs: unsupported inode size: %d\n", 1635 sbi->s_inode_size); 1636 goto failed_mount; 1637 } 1638 } 1639 sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << 1640 le32_to_cpu(es->s_log_frag_size); 1641 if (blocksize != sbi->s_frag_size) { 1642 printk(KERN_ERR 1643 "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n", 1644 sbi->s_frag_size, blocksize); 1645 goto failed_mount; 1646 } 1647 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 1648 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 1649 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 1650 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 1651 sbi->s_desc_size & (sbi->s_desc_size - 1)) { 1652 printk(KERN_ERR 1653 "EXT4-fs: unsupported descriptor size %lu\n", 1654 sbi->s_desc_size); 1655 goto failed_mount; 1656 } 1657 } else 1658 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 1659 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 1660 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); 1661 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 1662 if (EXT4_INODE_SIZE(sb) == 0) 1663 goto cantfind_ext4; 1664 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 1665 if (sbi->s_inodes_per_block == 0) 1666 goto cantfind_ext4; 1667 sbi->s_itb_per_group = sbi->s_inodes_per_group / 1668 sbi->s_inodes_per_block; 1669 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 1670 sbi->s_sbh = bh; 1671 sbi->s_mount_state = le16_to_cpu(es->s_state); 1672 sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb)); 1673 sbi->s_desc_per_block_bits = log2(EXT4_DESC_PER_BLOCK(sb)); 1674 for (i=0; i < 4; i++) 1675 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 1676 sbi->s_def_hash_version = es->s_def_hash_version; 1677 1678 if (sbi->s_blocks_per_group > blocksize * 8) { 1679 printk (KERN_ERR 1680 "EXT4-fs: #blocks per group too big: %lu\n", 1681 sbi->s_blocks_per_group); 1682 goto failed_mount; 1683 } 1684 if (sbi->s_frags_per_group > blocksize * 8) { 1685 printk (KERN_ERR 1686 "EXT4-fs: #fragments per group too big: %lu\n", 1687 sbi->s_frags_per_group); 1688 goto failed_mount; 1689 } 1690 if (sbi->s_inodes_per_group > blocksize * 8) { 1691 printk (KERN_ERR 1692 "EXT4-fs: #inodes per group too big: %lu\n", 1693 sbi->s_inodes_per_group); 1694 goto failed_mount; 1695 } 1696 1697 if (ext4_blocks_count(es) > 1698 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1699 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 1700 " too large to mount safely\n", sb->s_id); 1701 if (sizeof(sector_t) < 8) 1702 printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " 1703 "enabled\n"); 1704 goto failed_mount; 1705 } 1706 1707 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 1708 goto cantfind_ext4; 1709 blocks_count = (ext4_blocks_count(es) - 1710 le32_to_cpu(es->s_first_data_block) + 1711 EXT4_BLOCKS_PER_GROUP(sb) - 1); 1712 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 1713 sbi->s_groups_count = blocks_count; 1714 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 1715 EXT4_DESC_PER_BLOCK(sb); 1716 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1717 GFP_KERNEL); 1718 if (sbi->s_group_desc == NULL) { 1719 printk (KERN_ERR "EXT4-fs: not enough memory\n"); 1720 goto failed_mount; 1721 } 1722 1723 bgl_lock_init(&sbi->s_blockgroup_lock); 1724 1725 for (i = 0; i < db_count; i++) { 1726 block = descriptor_loc(sb, logical_sb_block, i); 1727 sbi->s_group_desc[i] = sb_bread(sb, block); 1728 if (!sbi->s_group_desc[i]) { 1729 printk (KERN_ERR "EXT4-fs: " 1730 "can't read group descriptor %d\n", i); 1731 db_count = i; 1732 goto failed_mount2; 1733 } 1734 } 1735 if (!ext4_check_descriptors (sb)) { 1736 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 1737 goto failed_mount2; 1738 } 1739 sbi->s_gdb_count = db_count; 1740 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1741 spin_lock_init(&sbi->s_next_gen_lock); 1742 1743 percpu_counter_init(&sbi->s_freeblocks_counter, 1744 ext4_count_free_blocks(sb)); 1745 percpu_counter_init(&sbi->s_freeinodes_counter, 1746 ext4_count_free_inodes(sb)); 1747 percpu_counter_init(&sbi->s_dirs_counter, 1748 ext4_count_dirs(sb)); 1749 1750 /* per fileystem reservation list head & lock */ 1751 spin_lock_init(&sbi->s_rsv_window_lock); 1752 sbi->s_rsv_window_root = RB_ROOT; 1753 /* Add a single, static dummy reservation to the start of the 1754 * reservation window list --- it gives us a placeholder for 1755 * append-at-start-of-list which makes the allocation logic 1756 * _much_ simpler. */ 1757 sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 1758 sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 1759 sbi->s_rsv_window_head.rsv_alloc_hit = 0; 1760 sbi->s_rsv_window_head.rsv_goal_size = 0; 1761 ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); 1762 1763 /* 1764 * set up enough so that it can read an inode 1765 */ 1766 sb->s_op = &ext4_sops; 1767 sb->s_export_op = &ext4_export_ops; 1768 sb->s_xattr = ext4_xattr_handlers; 1769 #ifdef CONFIG_QUOTA 1770 sb->s_qcop = &ext4_qctl_operations; 1771 sb->dq_op = &ext4_quota_operations; 1772 #endif 1773 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1774 1775 sb->s_root = NULL; 1776 1777 needs_recovery = (es->s_last_orphan != 0 || 1778 EXT4_HAS_INCOMPAT_FEATURE(sb, 1779 EXT4_FEATURE_INCOMPAT_RECOVER)); 1780 1781 /* 1782 * The first inode we look at is the journal inode. Don't try 1783 * root first: it may be modified in the journal! 1784 */ 1785 if (!test_opt(sb, NOLOAD) && 1786 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 1787 if (ext4_load_journal(sb, es, journal_devnum)) 1788 goto failed_mount3; 1789 } else if (journal_inum) { 1790 if (ext4_create_journal(sb, es, journal_inum)) 1791 goto failed_mount3; 1792 } else { 1793 if (!silent) 1794 printk (KERN_ERR 1795 "ext4: No journal on filesystem on %s\n", 1796 sb->s_id); 1797 goto failed_mount3; 1798 } 1799 1800 /* We have now updated the journal if required, so we can 1801 * validate the data journaling mode. */ 1802 switch (test_opt(sb, DATA_FLAGS)) { 1803 case 0: 1804 /* No mode set, assume a default based on the journal 1805 * capabilities: ORDERED_DATA if the journal can 1806 * cope, else JOURNAL_DATA 1807 */ 1808 if (jbd2_journal_check_available_features 1809 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 1810 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1811 else 1812 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1813 break; 1814 1815 case EXT4_MOUNT_ORDERED_DATA: 1816 case EXT4_MOUNT_WRITEBACK_DATA: 1817 if (!jbd2_journal_check_available_features 1818 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 1819 printk(KERN_ERR "EXT4-fs: Journal does not support " 1820 "requested data journaling mode\n"); 1821 goto failed_mount4; 1822 } 1823 default: 1824 break; 1825 } 1826 1827 if (test_opt(sb, NOBH)) { 1828 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 1829 printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " 1830 "its supported only with writeback mode\n"); 1831 clear_opt(sbi->s_mount_opt, NOBH); 1832 } 1833 } 1834 /* 1835 * The jbd2_journal_load will have done any necessary log recovery, 1836 * so we can safely mount the rest of the filesystem now. 1837 */ 1838 1839 root = iget(sb, EXT4_ROOT_INO); 1840 sb->s_root = d_alloc_root(root); 1841 if (!sb->s_root) { 1842 printk(KERN_ERR "EXT4-fs: get root inode failed\n"); 1843 iput(root); 1844 goto failed_mount4; 1845 } 1846 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1847 dput(sb->s_root); 1848 sb->s_root = NULL; 1849 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); 1850 goto failed_mount4; 1851 } 1852 1853 ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1854 /* 1855 * akpm: core read_super() calls in here with the superblock locked. 1856 * That deadlocks, because orphan cleanup needs to lock the superblock 1857 * in numerous places. Here we just pop the lock - it's relatively 1858 * harmless, because we are now ready to accept write_super() requests, 1859 * and aviro says that's the only reason for hanging onto the 1860 * superblock lock. 1861 */ 1862 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 1863 ext4_orphan_cleanup(sb, es); 1864 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 1865 if (needs_recovery) 1866 printk (KERN_INFO "EXT4-fs: recovery complete.\n"); 1867 ext4_mark_recovery_complete(sb, es); 1868 printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 1869 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 1870 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 1871 "writeback"); 1872 1873 ext4_ext_init(sb); 1874 1875 lock_kernel(); 1876 return 0; 1877 1878 cantfind_ext4: 1879 if (!silent) 1880 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", 1881 sb->s_id); 1882 goto failed_mount; 1883 1884 failed_mount4: 1885 jbd2_journal_destroy(sbi->s_journal); 1886 failed_mount3: 1887 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1888 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1889 percpu_counter_destroy(&sbi->s_dirs_counter); 1890 failed_mount2: 1891 for (i = 0; i < db_count; i++) 1892 brelse(sbi->s_group_desc[i]); 1893 kfree(sbi->s_group_desc); 1894 failed_mount: 1895 #ifdef CONFIG_QUOTA 1896 for (i = 0; i < MAXQUOTAS; i++) 1897 kfree(sbi->s_qf_names[i]); 1898 #endif 1899 ext4_blkdev_remove(sbi); 1900 brelse(bh); 1901 out_fail: 1902 sb->s_fs_info = NULL; 1903 kfree(sbi); 1904 lock_kernel(); 1905 return -EINVAL; 1906 } 1907 1908 /* 1909 * Setup any per-fs journal parameters now. We'll do this both on 1910 * initial mount, once the journal has been initialised but before we've 1911 * done any recovery; and again on any subsequent remount. 1912 */ 1913 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 1914 { 1915 struct ext4_sb_info *sbi = EXT4_SB(sb); 1916 1917 if (sbi->s_commit_interval) 1918 journal->j_commit_interval = sbi->s_commit_interval; 1919 /* We could also set up an ext4-specific default for the commit 1920 * interval here, but for now we'll just fall back to the jbd 1921 * default. */ 1922 1923 spin_lock(&journal->j_state_lock); 1924 if (test_opt(sb, BARRIER)) 1925 journal->j_flags |= JBD2_BARRIER; 1926 else 1927 journal->j_flags &= ~JBD2_BARRIER; 1928 spin_unlock(&journal->j_state_lock); 1929 } 1930 1931 static journal_t *ext4_get_journal(struct super_block *sb, 1932 unsigned int journal_inum) 1933 { 1934 struct inode *journal_inode; 1935 journal_t *journal; 1936 1937 /* First, test for the existence of a valid inode on disk. Bad 1938 * things happen if we iget() an unused inode, as the subsequent 1939 * iput() will try to delete it. */ 1940 1941 journal_inode = iget(sb, journal_inum); 1942 if (!journal_inode) { 1943 printk(KERN_ERR "EXT4-fs: no journal found.\n"); 1944 return NULL; 1945 } 1946 if (!journal_inode->i_nlink) { 1947 make_bad_inode(journal_inode); 1948 iput(journal_inode); 1949 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); 1950 return NULL; 1951 } 1952 1953 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 1954 journal_inode, journal_inode->i_size); 1955 if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { 1956 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 1957 iput(journal_inode); 1958 return NULL; 1959 } 1960 1961 journal = jbd2_journal_init_inode(journal_inode); 1962 if (!journal) { 1963 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); 1964 iput(journal_inode); 1965 return NULL; 1966 } 1967 journal->j_private = sb; 1968 ext4_init_journal_params(sb, journal); 1969 return journal; 1970 } 1971 1972 static journal_t *ext4_get_dev_journal(struct super_block *sb, 1973 dev_t j_dev) 1974 { 1975 struct buffer_head * bh; 1976 journal_t *journal; 1977 ext4_fsblk_t start; 1978 ext4_fsblk_t len; 1979 int hblock, blocksize; 1980 ext4_fsblk_t sb_block; 1981 unsigned long offset; 1982 struct ext4_super_block * es; 1983 struct block_device *bdev; 1984 1985 bdev = ext4_blkdev_get(j_dev); 1986 if (bdev == NULL) 1987 return NULL; 1988 1989 if (bd_claim(bdev, sb)) { 1990 printk(KERN_ERR 1991 "EXT4: failed to claim external journal device.\n"); 1992 blkdev_put(bdev); 1993 return NULL; 1994 } 1995 1996 blocksize = sb->s_blocksize; 1997 hblock = bdev_hardsect_size(bdev); 1998 if (blocksize < hblock) { 1999 printk(KERN_ERR 2000 "EXT4-fs: blocksize too small for journal device.\n"); 2001 goto out_bdev; 2002 } 2003 2004 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 2005 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 2006 set_blocksize(bdev, blocksize); 2007 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2008 printk(KERN_ERR "EXT4-fs: couldn't read superblock of " 2009 "external journal\n"); 2010 goto out_bdev; 2011 } 2012 2013 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2014 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 2015 !(le32_to_cpu(es->s_feature_incompat) & 2016 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2017 printk(KERN_ERR "EXT4-fs: external journal has " 2018 "bad superblock\n"); 2019 brelse(bh); 2020 goto out_bdev; 2021 } 2022 2023 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2024 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); 2025 brelse(bh); 2026 goto out_bdev; 2027 } 2028 2029 len = ext4_blocks_count(es); 2030 start = sb_block + 1; 2031 brelse(bh); /* we're done with the superblock */ 2032 2033 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 2034 start, len, blocksize); 2035 if (!journal) { 2036 printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); 2037 goto out_bdev; 2038 } 2039 journal->j_private = sb; 2040 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2041 wait_on_buffer(journal->j_sb_buffer); 2042 if (!buffer_uptodate(journal->j_sb_buffer)) { 2043 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); 2044 goto out_journal; 2045 } 2046 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2047 printk(KERN_ERR "EXT4-fs: External journal has more than one " 2048 "user (unsupported) - %d\n", 2049 be32_to_cpu(journal->j_superblock->s_nr_users)); 2050 goto out_journal; 2051 } 2052 EXT4_SB(sb)->journal_bdev = bdev; 2053 ext4_init_journal_params(sb, journal); 2054 return journal; 2055 out_journal: 2056 jbd2_journal_destroy(journal); 2057 out_bdev: 2058 ext4_blkdev_put(bdev); 2059 return NULL; 2060 } 2061 2062 static int ext4_load_journal(struct super_block *sb, 2063 struct ext4_super_block *es, 2064 unsigned long journal_devnum) 2065 { 2066 journal_t *journal; 2067 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 2068 dev_t journal_dev; 2069 int err = 0; 2070 int really_read_only; 2071 2072 if (journal_devnum && 2073 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2074 printk(KERN_INFO "EXT4-fs: external journal device major/minor " 2075 "numbers have changed\n"); 2076 journal_dev = new_decode_dev(journal_devnum); 2077 } else 2078 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2079 2080 really_read_only = bdev_read_only(sb->s_bdev); 2081 2082 /* 2083 * Are we loading a blank journal or performing recovery after a 2084 * crash? For recovery, we need to check in advance whether we 2085 * can get read-write access to the device. 2086 */ 2087 2088 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2089 if (sb->s_flags & MS_RDONLY) { 2090 printk(KERN_INFO "EXT4-fs: INFO: recovery " 2091 "required on readonly filesystem.\n"); 2092 if (really_read_only) { 2093 printk(KERN_ERR "EXT4-fs: write access " 2094 "unavailable, cannot proceed.\n"); 2095 return -EROFS; 2096 } 2097 printk (KERN_INFO "EXT4-fs: write access will " 2098 "be enabled during recovery.\n"); 2099 } 2100 } 2101 2102 if (journal_inum && journal_dev) { 2103 printk(KERN_ERR "EXT4-fs: filesystem has both journal " 2104 "and inode journals!\n"); 2105 return -EINVAL; 2106 } 2107 2108 if (journal_inum) { 2109 if (!(journal = ext4_get_journal(sb, journal_inum))) 2110 return -EINVAL; 2111 } else { 2112 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 2113 return -EINVAL; 2114 } 2115 2116 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2117 err = jbd2_journal_update_format(journal); 2118 if (err) { 2119 printk(KERN_ERR "EXT4-fs: error updating journal.\n"); 2120 jbd2_journal_destroy(journal); 2121 return err; 2122 } 2123 } 2124 2125 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 2126 err = jbd2_journal_wipe(journal, !really_read_only); 2127 if (!err) 2128 err = jbd2_journal_load(journal); 2129 2130 if (err) { 2131 printk(KERN_ERR "EXT4-fs: error loading journal.\n"); 2132 jbd2_journal_destroy(journal); 2133 return err; 2134 } 2135 2136 EXT4_SB(sb)->s_journal = journal; 2137 ext4_clear_journal_err(sb, es); 2138 2139 if (journal_devnum && 2140 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2141 es->s_journal_dev = cpu_to_le32(journal_devnum); 2142 sb->s_dirt = 1; 2143 2144 /* Make sure we flush the recovery flag to disk. */ 2145 ext4_commit_super(sb, es, 1); 2146 } 2147 2148 return 0; 2149 } 2150 2151 static int ext4_create_journal(struct super_block * sb, 2152 struct ext4_super_block * es, 2153 unsigned int journal_inum) 2154 { 2155 journal_t *journal; 2156 2157 if (sb->s_flags & MS_RDONLY) { 2158 printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to " 2159 "create journal.\n"); 2160 return -EROFS; 2161 } 2162 2163 if (!(journal = ext4_get_journal(sb, journal_inum))) 2164 return -EINVAL; 2165 2166 printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n", 2167 journal_inum); 2168 2169 if (jbd2_journal_create(journal)) { 2170 printk(KERN_ERR "EXT4-fs: error creating journal.\n"); 2171 jbd2_journal_destroy(journal); 2172 return -EIO; 2173 } 2174 2175 EXT4_SB(sb)->s_journal = journal; 2176 2177 ext4_update_dynamic_rev(sb); 2178 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2179 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL); 2180 2181 es->s_journal_inum = cpu_to_le32(journal_inum); 2182 sb->s_dirt = 1; 2183 2184 /* Make sure we flush the recovery flag to disk. */ 2185 ext4_commit_super(sb, es, 1); 2186 2187 return 0; 2188 } 2189 2190 static void ext4_commit_super (struct super_block * sb, 2191 struct ext4_super_block * es, 2192 int sync) 2193 { 2194 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2195 2196 if (!sbh) 2197 return; 2198 es->s_wtime = cpu_to_le32(get_seconds()); 2199 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2200 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2201 BUFFER_TRACE(sbh, "marking dirty"); 2202 mark_buffer_dirty(sbh); 2203 if (sync) 2204 sync_dirty_buffer(sbh); 2205 } 2206 2207 2208 /* 2209 * Have we just finished recovery? If so, and if we are mounting (or 2210 * remounting) the filesystem readonly, then we will end up with a 2211 * consistent fs on disk. Record that fact. 2212 */ 2213 static void ext4_mark_recovery_complete(struct super_block * sb, 2214 struct ext4_super_block * es) 2215 { 2216 journal_t *journal = EXT4_SB(sb)->s_journal; 2217 2218 jbd2_journal_lock_updates(journal); 2219 jbd2_journal_flush(journal); 2220 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2221 sb->s_flags & MS_RDONLY) { 2222 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2223 sb->s_dirt = 0; 2224 ext4_commit_super(sb, es, 1); 2225 } 2226 jbd2_journal_unlock_updates(journal); 2227 } 2228 2229 /* 2230 * If we are mounting (or read-write remounting) a filesystem whose journal 2231 * has recorded an error from a previous lifetime, move that error to the 2232 * main filesystem now. 2233 */ 2234 static void ext4_clear_journal_err(struct super_block * sb, 2235 struct ext4_super_block * es) 2236 { 2237 journal_t *journal; 2238 int j_errno; 2239 const char *errstr; 2240 2241 journal = EXT4_SB(sb)->s_journal; 2242 2243 /* 2244 * Now check for any error status which may have been recorded in the 2245 * journal by a prior ext4_error() or ext4_abort() 2246 */ 2247 2248 j_errno = jbd2_journal_errno(journal); 2249 if (j_errno) { 2250 char nbuf[16]; 2251 2252 errstr = ext4_decode_error(sb, j_errno, nbuf); 2253 ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " 2254 "from previous mount: %s", errstr); 2255 ext4_warning(sb, __FUNCTION__, "Marking fs in need of " 2256 "filesystem check."); 2257 2258 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2259 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2260 ext4_commit_super (sb, es, 1); 2261 2262 jbd2_journal_clear_err(journal); 2263 } 2264 } 2265 2266 /* 2267 * Force the running and committing transactions to commit, 2268 * and wait on the commit. 2269 */ 2270 int ext4_force_commit(struct super_block *sb) 2271 { 2272 journal_t *journal; 2273 int ret; 2274 2275 if (sb->s_flags & MS_RDONLY) 2276 return 0; 2277 2278 journal = EXT4_SB(sb)->s_journal; 2279 sb->s_dirt = 0; 2280 ret = ext4_journal_force_commit(journal); 2281 return ret; 2282 } 2283 2284 /* 2285 * Ext4 always journals updates to the superblock itself, so we don't 2286 * have to propagate any other updates to the superblock on disk at this 2287 * point. Just start an async writeback to get the buffers on their way 2288 * to the disk. 2289 * 2290 * This implicitly triggers the writebehind on sync(). 2291 */ 2292 2293 static void ext4_write_super (struct super_block * sb) 2294 { 2295 if (mutex_trylock(&sb->s_lock) != 0) 2296 BUG(); 2297 sb->s_dirt = 0; 2298 } 2299 2300 static int ext4_sync_fs(struct super_block *sb, int wait) 2301 { 2302 tid_t target; 2303 2304 sb->s_dirt = 0; 2305 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2306 if (wait) 2307 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 2308 } 2309 return 0; 2310 } 2311 2312 /* 2313 * LVM calls this function before a (read-only) snapshot is created. This 2314 * gives us a chance to flush the journal completely and mark the fs clean. 2315 */ 2316 static void ext4_write_super_lockfs(struct super_block *sb) 2317 { 2318 sb->s_dirt = 0; 2319 2320 if (!(sb->s_flags & MS_RDONLY)) { 2321 journal_t *journal = EXT4_SB(sb)->s_journal; 2322 2323 /* Now we set up the journal barrier. */ 2324 jbd2_journal_lock_updates(journal); 2325 jbd2_journal_flush(journal); 2326 2327 /* Journal blocked and flushed, clear needs_recovery flag. */ 2328 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2329 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2330 } 2331 } 2332 2333 /* 2334 * Called by LVM after the snapshot is done. We need to reset the RECOVER 2335 * flag here, even though the filesystem is not technically dirty yet. 2336 */ 2337 static void ext4_unlockfs(struct super_block *sb) 2338 { 2339 if (!(sb->s_flags & MS_RDONLY)) { 2340 lock_super(sb); 2341 /* Reser the needs_recovery flag before the fs is unlocked. */ 2342 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2343 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 2344 unlock_super(sb); 2345 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 2346 } 2347 } 2348 2349 static int ext4_remount (struct super_block * sb, int * flags, char * data) 2350 { 2351 struct ext4_super_block * es; 2352 struct ext4_sb_info *sbi = EXT4_SB(sb); 2353 ext4_fsblk_t n_blocks_count = 0; 2354 unsigned long old_sb_flags; 2355 struct ext4_mount_options old_opts; 2356 int err; 2357 #ifdef CONFIG_QUOTA 2358 int i; 2359 #endif 2360 2361 /* Store the original options */ 2362 old_sb_flags = sb->s_flags; 2363 old_opts.s_mount_opt = sbi->s_mount_opt; 2364 old_opts.s_resuid = sbi->s_resuid; 2365 old_opts.s_resgid = sbi->s_resgid; 2366 old_opts.s_commit_interval = sbi->s_commit_interval; 2367 #ifdef CONFIG_QUOTA 2368 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2369 for (i = 0; i < MAXQUOTAS; i++) 2370 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2371 #endif 2372 2373 /* 2374 * Allow the "check" option to be passed as a remount option. 2375 */ 2376 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { 2377 err = -EINVAL; 2378 goto restore_opts; 2379 } 2380 2381 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2382 ext4_abort(sb, __FUNCTION__, "Abort forced by user"); 2383 2384 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2385 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2386 2387 es = sbi->s_es; 2388 2389 ext4_init_journal_params(sb, sbi->s_journal); 2390 2391 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2392 n_blocks_count > ext4_blocks_count(es)) { 2393 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) { 2394 err = -EROFS; 2395 goto restore_opts; 2396 } 2397 2398 if (*flags & MS_RDONLY) { 2399 /* 2400 * First of all, the unconditional stuff we have to do 2401 * to disable replay of the journal when we next remount 2402 */ 2403 sb->s_flags |= MS_RDONLY; 2404 2405 /* 2406 * OK, test if we are remounting a valid rw partition 2407 * readonly, and if so set the rdonly flag and then 2408 * mark the partition as valid again. 2409 */ 2410 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 2411 (sbi->s_mount_state & EXT4_VALID_FS)) 2412 es->s_state = cpu_to_le16(sbi->s_mount_state); 2413 2414 ext4_mark_recovery_complete(sb, es); 2415 } else { 2416 __le32 ret; 2417 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2418 ~EXT4_FEATURE_RO_COMPAT_SUPP))) { 2419 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 2420 "remount RDWR because of unsupported " 2421 "optional features (%x).\n", 2422 sb->s_id, le32_to_cpu(ret)); 2423 err = -EROFS; 2424 goto restore_opts; 2425 } 2426 2427 /* 2428 * If we have an unprocessed orphan list hanging 2429 * around from a previously readonly bdev mount, 2430 * require a full umount/remount for now. 2431 */ 2432 if (es->s_last_orphan) { 2433 printk(KERN_WARNING "EXT4-fs: %s: couldn't " 2434 "remount RDWR because of unprocessed " 2435 "orphan inode list. Please " 2436 "umount/remount instead.\n", 2437 sb->s_id); 2438 err = -EINVAL; 2439 goto restore_opts; 2440 } 2441 2442 /* 2443 * Mounting a RDONLY partition read-write, so reread 2444 * and store the current valid flag. (It may have 2445 * been changed by e2fsck since we originally mounted 2446 * the partition.) 2447 */ 2448 ext4_clear_journal_err(sb, es); 2449 sbi->s_mount_state = le16_to_cpu(es->s_state); 2450 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 2451 goto restore_opts; 2452 if (!ext4_setup_super (sb, es, 0)) 2453 sb->s_flags &= ~MS_RDONLY; 2454 } 2455 } 2456 #ifdef CONFIG_QUOTA 2457 /* Release old quota file names */ 2458 for (i = 0; i < MAXQUOTAS; i++) 2459 if (old_opts.s_qf_names[i] && 2460 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2461 kfree(old_opts.s_qf_names[i]); 2462 #endif 2463 return 0; 2464 restore_opts: 2465 sb->s_flags = old_sb_flags; 2466 sbi->s_mount_opt = old_opts.s_mount_opt; 2467 sbi->s_resuid = old_opts.s_resuid; 2468 sbi->s_resgid = old_opts.s_resgid; 2469 sbi->s_commit_interval = old_opts.s_commit_interval; 2470 #ifdef CONFIG_QUOTA 2471 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2472 for (i = 0; i < MAXQUOTAS; i++) { 2473 if (sbi->s_qf_names[i] && 2474 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 2475 kfree(sbi->s_qf_names[i]); 2476 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2477 } 2478 #endif 2479 return err; 2480 } 2481 2482 static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) 2483 { 2484 struct super_block *sb = dentry->d_sb; 2485 struct ext4_sb_info *sbi = EXT4_SB(sb); 2486 struct ext4_super_block *es = sbi->s_es; 2487 ext4_fsblk_t overhead; 2488 int i; 2489 u64 fsid; 2490 2491 if (test_opt (sb, MINIX_DF)) 2492 overhead = 0; 2493 else { 2494 unsigned long ngroups; 2495 ngroups = EXT4_SB(sb)->s_groups_count; 2496 smp_rmb(); 2497 2498 /* 2499 * Compute the overhead (FS structures) 2500 */ 2501 2502 /* 2503 * All of the blocks before first_data_block are 2504 * overhead 2505 */ 2506 overhead = le32_to_cpu(es->s_first_data_block); 2507 2508 /* 2509 * Add the overhead attributed to the superblock and 2510 * block group descriptors. If the sparse superblocks 2511 * feature is turned on, then not all groups have this. 2512 */ 2513 for (i = 0; i < ngroups; i++) { 2514 overhead += ext4_bg_has_super(sb, i) + 2515 ext4_bg_num_gdb(sb, i); 2516 cond_resched(); 2517 } 2518 2519 /* 2520 * Every block group has an inode bitmap, a block 2521 * bitmap, and an inode table. 2522 */ 2523 overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group)); 2524 } 2525 2526 buf->f_type = EXT4_SUPER_MAGIC; 2527 buf->f_bsize = sb->s_blocksize; 2528 buf->f_blocks = ext4_blocks_count(es) - overhead; 2529 buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter); 2530 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 2531 if (buf->f_bfree < ext4_r_blocks_count(es)) 2532 buf->f_bavail = 0; 2533 buf->f_files = le32_to_cpu(es->s_inodes_count); 2534 buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter); 2535 buf->f_namelen = EXT4_NAME_LEN; 2536 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2537 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2538 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 2539 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 2540 return 0; 2541 } 2542 2543 /* Helper function for writing quotas on sync - we need to start transaction before quota file 2544 * is locked for write. Otherwise the are possible deadlocks: 2545 * Process 1 Process 2 2546 * ext4_create() quota_sync() 2547 * jbd2_journal_start() write_dquot() 2548 * DQUOT_INIT() down(dqio_mutex) 2549 * down(dqio_mutex) jbd2_journal_start() 2550 * 2551 */ 2552 2553 #ifdef CONFIG_QUOTA 2554 2555 static inline struct inode *dquot_to_inode(struct dquot *dquot) 2556 { 2557 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 2558 } 2559 2560 static int ext4_dquot_initialize(struct inode *inode, int type) 2561 { 2562 handle_t *handle; 2563 int ret, err; 2564 2565 /* We may create quota structure so we need to reserve enough blocks */ 2566 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)); 2567 if (IS_ERR(handle)) 2568 return PTR_ERR(handle); 2569 ret = dquot_initialize(inode, type); 2570 err = ext4_journal_stop(handle); 2571 if (!ret) 2572 ret = err; 2573 return ret; 2574 } 2575 2576 static int ext4_dquot_drop(struct inode *inode) 2577 { 2578 handle_t *handle; 2579 int ret, err; 2580 2581 /* We may delete quota structure so we need to reserve enough blocks */ 2582 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 2583 if (IS_ERR(handle)) 2584 return PTR_ERR(handle); 2585 ret = dquot_drop(inode); 2586 err = ext4_journal_stop(handle); 2587 if (!ret) 2588 ret = err; 2589 return ret; 2590 } 2591 2592 static int ext4_write_dquot(struct dquot *dquot) 2593 { 2594 int ret, err; 2595 handle_t *handle; 2596 struct inode *inode; 2597 2598 inode = dquot_to_inode(dquot); 2599 handle = ext4_journal_start(inode, 2600 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2601 if (IS_ERR(handle)) 2602 return PTR_ERR(handle); 2603 ret = dquot_commit(dquot); 2604 err = ext4_journal_stop(handle); 2605 if (!ret) 2606 ret = err; 2607 return ret; 2608 } 2609 2610 static int ext4_acquire_dquot(struct dquot *dquot) 2611 { 2612 int ret, err; 2613 handle_t *handle; 2614 2615 handle = ext4_journal_start(dquot_to_inode(dquot), 2616 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2617 if (IS_ERR(handle)) 2618 return PTR_ERR(handle); 2619 ret = dquot_acquire(dquot); 2620 err = ext4_journal_stop(handle); 2621 if (!ret) 2622 ret = err; 2623 return ret; 2624 } 2625 2626 static int ext4_release_dquot(struct dquot *dquot) 2627 { 2628 int ret, err; 2629 handle_t *handle; 2630 2631 handle = ext4_journal_start(dquot_to_inode(dquot), 2632 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2633 if (IS_ERR(handle)) 2634 return PTR_ERR(handle); 2635 ret = dquot_release(dquot); 2636 err = ext4_journal_stop(handle); 2637 if (!ret) 2638 ret = err; 2639 return ret; 2640 } 2641 2642 static int ext4_mark_dquot_dirty(struct dquot *dquot) 2643 { 2644 /* Are we journalling quotas? */ 2645 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2646 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2647 dquot_mark_dquot_dirty(dquot); 2648 return ext4_write_dquot(dquot); 2649 } else { 2650 return dquot_mark_dquot_dirty(dquot); 2651 } 2652 } 2653 2654 static int ext4_write_info(struct super_block *sb, int type) 2655 { 2656 int ret, err; 2657 handle_t *handle; 2658 2659 /* Data block + inode block */ 2660 handle = ext4_journal_start(sb->s_root->d_inode, 2); 2661 if (IS_ERR(handle)) 2662 return PTR_ERR(handle); 2663 ret = dquot_commit_info(sb, type); 2664 err = ext4_journal_stop(handle); 2665 if (!ret) 2666 ret = err; 2667 return ret; 2668 } 2669 2670 /* 2671 * Turn on quotas during mount time - we need to find 2672 * the quota file and such... 2673 */ 2674 static int ext4_quota_on_mount(struct super_block *sb, int type) 2675 { 2676 return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 2677 EXT4_SB(sb)->s_jquota_fmt, type); 2678 } 2679 2680 /* 2681 * Standard function to be called on quota_on 2682 */ 2683 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 2684 char *path) 2685 { 2686 int err; 2687 struct nameidata nd; 2688 2689 if (!test_opt(sb, QUOTA)) 2690 return -EINVAL; 2691 /* Not journalling quota? */ 2692 if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] && 2693 !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) 2694 return vfs_quota_on(sb, type, format_id, path); 2695 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2696 if (err) 2697 return err; 2698 /* Quotafile not on the same filesystem? */ 2699 if (nd.mnt->mnt_sb != sb) { 2700 path_release(&nd); 2701 return -EXDEV; 2702 } 2703 /* Quotafile not of fs root? */ 2704 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) 2705 printk(KERN_WARNING 2706 "EXT4-fs: Quota file not on filesystem root. " 2707 "Journalled quota will not work.\n"); 2708 path_release(&nd); 2709 return vfs_quota_on(sb, type, format_id, path); 2710 } 2711 2712 /* Read data from quotafile - avoid pagecache and such because we cannot afford 2713 * acquiring the locks... As quota files are never truncated and quota code 2714 * itself serializes the operations (and noone else should touch the files) 2715 * we don't have to be afraid of races */ 2716 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 2717 size_t len, loff_t off) 2718 { 2719 struct inode *inode = sb_dqopt(sb)->files[type]; 2720 sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 2721 int err = 0; 2722 int offset = off & (sb->s_blocksize - 1); 2723 int tocopy; 2724 size_t toread; 2725 struct buffer_head *bh; 2726 loff_t i_size = i_size_read(inode); 2727 2728 if (off > i_size) 2729 return 0; 2730 if (off+len > i_size) 2731 len = i_size-off; 2732 toread = len; 2733 while (toread > 0) { 2734 tocopy = sb->s_blocksize - offset < toread ? 2735 sb->s_blocksize - offset : toread; 2736 bh = ext4_bread(NULL, inode, blk, 0, &err); 2737 if (err) 2738 return err; 2739 if (!bh) /* A hole? */ 2740 memset(data, 0, tocopy); 2741 else 2742 memcpy(data, bh->b_data+offset, tocopy); 2743 brelse(bh); 2744 offset = 0; 2745 toread -= tocopy; 2746 data += tocopy; 2747 blk++; 2748 } 2749 return len; 2750 } 2751 2752 /* Write to quotafile (we know the transaction is already started and has 2753 * enough credits) */ 2754 static ssize_t ext4_quota_write(struct super_block *sb, int type, 2755 const char *data, size_t len, loff_t off) 2756 { 2757 struct inode *inode = sb_dqopt(sb)->files[type]; 2758 sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 2759 int err = 0; 2760 int offset = off & (sb->s_blocksize - 1); 2761 int tocopy; 2762 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 2763 size_t towrite = len; 2764 struct buffer_head *bh; 2765 handle_t *handle = journal_current_handle(); 2766 2767 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 2768 while (towrite > 0) { 2769 tocopy = sb->s_blocksize - offset < towrite ? 2770 sb->s_blocksize - offset : towrite; 2771 bh = ext4_bread(handle, inode, blk, 1, &err); 2772 if (!bh) 2773 goto out; 2774 if (journal_quota) { 2775 err = ext4_journal_get_write_access(handle, bh); 2776 if (err) { 2777 brelse(bh); 2778 goto out; 2779 } 2780 } 2781 lock_buffer(bh); 2782 memcpy(bh->b_data+offset, data, tocopy); 2783 flush_dcache_page(bh->b_page); 2784 unlock_buffer(bh); 2785 if (journal_quota) 2786 err = ext4_journal_dirty_metadata(handle, bh); 2787 else { 2788 /* Always do at least ordered writes for quotas */ 2789 err = ext4_journal_dirty_data(handle, bh); 2790 mark_buffer_dirty(bh); 2791 } 2792 brelse(bh); 2793 if (err) 2794 goto out; 2795 offset = 0; 2796 towrite -= tocopy; 2797 data += tocopy; 2798 blk++; 2799 } 2800 out: 2801 if (len == towrite) 2802 return err; 2803 if (inode->i_size < off+len-towrite) { 2804 i_size_write(inode, off+len-towrite); 2805 EXT4_I(inode)->i_disksize = inode->i_size; 2806 } 2807 inode->i_version++; 2808 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2809 ext4_mark_inode_dirty(handle, inode); 2810 mutex_unlock(&inode->i_mutex); 2811 return len - towrite; 2812 } 2813 2814 #endif 2815 2816 static int ext4_get_sb(struct file_system_type *fs_type, 2817 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 2818 { 2819 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 2820 } 2821 2822 static struct file_system_type ext4dev_fs_type = { 2823 .owner = THIS_MODULE, 2824 .name = "ext4dev", 2825 .get_sb = ext4_get_sb, 2826 .kill_sb = kill_block_super, 2827 .fs_flags = FS_REQUIRES_DEV, 2828 }; 2829 2830 static int __init init_ext4_fs(void) 2831 { 2832 int err = init_ext4_xattr(); 2833 if (err) 2834 return err; 2835 err = init_inodecache(); 2836 if (err) 2837 goto out1; 2838 err = register_filesystem(&ext4dev_fs_type); 2839 if (err) 2840 goto out; 2841 return 0; 2842 out: 2843 destroy_inodecache(); 2844 out1: 2845 exit_ext4_xattr(); 2846 return err; 2847 } 2848 2849 static void __exit exit_ext4_fs(void) 2850 { 2851 unregister_filesystem(&ext4dev_fs_type); 2852 destroy_inodecache(); 2853 exit_ext4_xattr(); 2854 } 2855 2856 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 2857 MODULE_DESCRIPTION("Fourth Extended Filesystem with extents"); 2858 MODULE_LICENSE("GPL"); 2859 module_init(init_ext4_fs) 2860 module_exit(exit_ext4_fs) 2861