1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/jbd2.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/blkdev.h> 28 #include <linux/parser.h> 29 #include <linux/smp_lock.h> 30 #include <linux/buffer_head.h> 31 #include <linux/exportfs.h> 32 #include <linux/vfs.h> 33 #include <linux/random.h> 34 #include <linux/mount.h> 35 #include <linux/namei.h> 36 #include <linux/quotaops.h> 37 #include <linux/seq_file.h> 38 #include <linux/proc_fs.h> 39 #include <linux/ctype.h> 40 #include <linux/log2.h> 41 #include <linux/crc16.h> 42 #include <asm/uaccess.h> 43 44 #include "ext4.h" 45 #include "ext4_jbd2.h" 46 #include "xattr.h" 47 #include "acl.h" 48 #include "mballoc.h" 49 50 #define CREATE_TRACE_POINTS 51 #include <trace/events/ext4.h> 52 53 struct proc_dir_entry *ext4_proc_root; 54 static struct kset *ext4_kset; 55 56 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 57 unsigned long journal_devnum); 58 static int ext4_commit_super(struct super_block *sb, int sync); 59 static void ext4_mark_recovery_complete(struct super_block *sb, 60 struct ext4_super_block *es); 61 static void ext4_clear_journal_err(struct super_block *sb, 62 struct ext4_super_block *es); 63 static int ext4_sync_fs(struct super_block *sb, int wait); 64 static const char *ext4_decode_error(struct super_block *sb, int errno, 65 char nbuf[16]); 66 static int ext4_remount(struct super_block *sb, int *flags, char *data); 67 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 68 static int ext4_unfreeze(struct super_block *sb); 69 static void ext4_write_super(struct super_block *sb); 70 static int ext4_freeze(struct super_block *sb); 71 static int ext4_get_sb(struct file_system_type *fs_type, int flags, 72 const char *dev_name, void *data, struct vfsmount *mnt); 73 74 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 75 static struct file_system_type ext3_fs_type = { 76 .owner = THIS_MODULE, 77 .name = "ext3", 78 .get_sb = ext4_get_sb, 79 .kill_sb = kill_block_super, 80 .fs_flags = FS_REQUIRES_DEV, 81 }; 82 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 83 #else 84 #define IS_EXT3_SB(sb) (0) 85 #endif 86 87 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 88 struct ext4_group_desc *bg) 89 { 90 return le32_to_cpu(bg->bg_block_bitmap_lo) | 91 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 92 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 93 } 94 95 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 96 struct ext4_group_desc *bg) 97 { 98 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 99 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 100 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 101 } 102 103 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 104 struct ext4_group_desc *bg) 105 { 106 return le32_to_cpu(bg->bg_inode_table_lo) | 107 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 108 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 109 } 110 111 __u32 ext4_free_blks_count(struct super_block *sb, 112 struct ext4_group_desc *bg) 113 { 114 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 115 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 116 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 117 } 118 119 __u32 ext4_free_inodes_count(struct super_block *sb, 120 struct ext4_group_desc *bg) 121 { 122 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 123 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 124 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 125 } 126 127 __u32 ext4_used_dirs_count(struct super_block *sb, 128 struct ext4_group_desc *bg) 129 { 130 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 131 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 132 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 133 } 134 135 __u32 ext4_itable_unused_count(struct super_block *sb, 136 struct ext4_group_desc *bg) 137 { 138 return le16_to_cpu(bg->bg_itable_unused_lo) | 139 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 140 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 141 } 142 143 void ext4_block_bitmap_set(struct super_block *sb, 144 struct ext4_group_desc *bg, ext4_fsblk_t blk) 145 { 146 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 147 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 148 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 149 } 150 151 void ext4_inode_bitmap_set(struct super_block *sb, 152 struct ext4_group_desc *bg, ext4_fsblk_t blk) 153 { 154 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 155 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 156 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 157 } 158 159 void ext4_inode_table_set(struct super_block *sb, 160 struct ext4_group_desc *bg, ext4_fsblk_t blk) 161 { 162 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 163 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 164 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 165 } 166 167 void ext4_free_blks_set(struct super_block *sb, 168 struct ext4_group_desc *bg, __u32 count) 169 { 170 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 171 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 172 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 173 } 174 175 void ext4_free_inodes_set(struct super_block *sb, 176 struct ext4_group_desc *bg, __u32 count) 177 { 178 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 179 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 180 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 181 } 182 183 void ext4_used_dirs_set(struct super_block *sb, 184 struct ext4_group_desc *bg, __u32 count) 185 { 186 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 187 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 188 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 189 } 190 191 void ext4_itable_unused_set(struct super_block *sb, 192 struct ext4_group_desc *bg, __u32 count) 193 { 194 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 195 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 196 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 197 } 198 199 200 /* Just increment the non-pointer handle value */ 201 static handle_t *ext4_get_nojournal(void) 202 { 203 handle_t *handle = current->journal_info; 204 unsigned long ref_cnt = (unsigned long)handle; 205 206 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); 207 208 ref_cnt++; 209 handle = (handle_t *)ref_cnt; 210 211 current->journal_info = handle; 212 return handle; 213 } 214 215 216 /* Decrement the non-pointer handle value */ 217 static void ext4_put_nojournal(handle_t *handle) 218 { 219 unsigned long ref_cnt = (unsigned long)handle; 220 221 BUG_ON(ref_cnt == 0); 222 223 ref_cnt--; 224 handle = (handle_t *)ref_cnt; 225 226 current->journal_info = handle; 227 } 228 229 /* 230 * Wrappers for jbd2_journal_start/end. 231 * 232 * The only special thing we need to do here is to make sure that all 233 * journal_end calls result in the superblock being marked dirty, so 234 * that sync() will call the filesystem's write_super callback if 235 * appropriate. 236 */ 237 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 238 { 239 journal_t *journal; 240 241 if (sb->s_flags & MS_RDONLY) 242 return ERR_PTR(-EROFS); 243 244 vfs_check_frozen(sb, SB_FREEZE_WRITE); 245 /* Special case here: if the journal has aborted behind our 246 * backs (eg. EIO in the commit thread), then we still need to 247 * take the FS itself readonly cleanly. */ 248 journal = EXT4_SB(sb)->s_journal; 249 if (journal) { 250 if (is_journal_aborted(journal)) { 251 ext4_abort(sb, __func__, "Detected aborted journal"); 252 return ERR_PTR(-EROFS); 253 } 254 return jbd2_journal_start(journal, nblocks); 255 } 256 return ext4_get_nojournal(); 257 } 258 259 /* 260 * The only special thing we need to do here is to make sure that all 261 * jbd2_journal_stop calls result in the superblock being marked dirty, so 262 * that sync() will call the filesystem's write_super callback if 263 * appropriate. 264 */ 265 int __ext4_journal_stop(const char *where, handle_t *handle) 266 { 267 struct super_block *sb; 268 int err; 269 int rc; 270 271 if (!ext4_handle_valid(handle)) { 272 ext4_put_nojournal(handle); 273 return 0; 274 } 275 sb = handle->h_transaction->t_journal->j_private; 276 err = handle->h_err; 277 rc = jbd2_journal_stop(handle); 278 279 if (!err) 280 err = rc; 281 if (err) 282 __ext4_std_error(sb, where, err); 283 return err; 284 } 285 286 void ext4_journal_abort_handle(const char *caller, const char *err_fn, 287 struct buffer_head *bh, handle_t *handle, int err) 288 { 289 char nbuf[16]; 290 const char *errstr = ext4_decode_error(NULL, err, nbuf); 291 292 BUG_ON(!ext4_handle_valid(handle)); 293 294 if (bh) 295 BUFFER_TRACE(bh, "abort"); 296 297 if (!handle->h_err) 298 handle->h_err = err; 299 300 if (is_handle_aborted(handle)) 301 return; 302 303 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 304 caller, errstr, err_fn); 305 306 jbd2_journal_abort_handle(handle); 307 } 308 309 /* Deal with the reporting of failure conditions on a filesystem such as 310 * inconsistencies detected or read IO failures. 311 * 312 * On ext2, we can store the error state of the filesystem in the 313 * superblock. That is not possible on ext4, because we may have other 314 * write ordering constraints on the superblock which prevent us from 315 * writing it out straight away; and given that the journal is about to 316 * be aborted, we can't rely on the current, or future, transactions to 317 * write out the superblock safely. 318 * 319 * We'll just use the jbd2_journal_abort() error code to record an error in 320 * the journal instead. On recovery, the journal will complain about 321 * that error until we've noted it down and cleared it. 322 */ 323 324 static void ext4_handle_error(struct super_block *sb) 325 { 326 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 327 328 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 329 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 330 331 if (sb->s_flags & MS_RDONLY) 332 return; 333 334 if (!test_opt(sb, ERRORS_CONT)) { 335 journal_t *journal = EXT4_SB(sb)->s_journal; 336 337 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 338 if (journal) 339 jbd2_journal_abort(journal, -EIO); 340 } 341 if (test_opt(sb, ERRORS_RO)) { 342 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 343 sb->s_flags |= MS_RDONLY; 344 } 345 ext4_commit_super(sb, 1); 346 if (test_opt(sb, ERRORS_PANIC)) 347 panic("EXT4-fs (device %s): panic forced after error\n", 348 sb->s_id); 349 } 350 351 void __ext4_error(struct super_block *sb, const char *function, 352 const char *fmt, ...) 353 { 354 va_list args; 355 356 va_start(args, fmt); 357 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 358 vprintk(fmt, args); 359 printk("\n"); 360 va_end(args); 361 362 ext4_handle_error(sb); 363 } 364 365 void ext4_error_inode(const char *function, struct inode *inode, 366 const char *fmt, ...) 367 { 368 va_list args; 369 370 va_start(args, fmt); 371 printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", 372 inode->i_sb->s_id, function, inode->i_ino, current->comm); 373 vprintk(fmt, args); 374 printk("\n"); 375 va_end(args); 376 377 ext4_handle_error(inode->i_sb); 378 } 379 380 void ext4_error_file(const char *function, struct file *file, 381 const char *fmt, ...) 382 { 383 va_list args; 384 struct inode *inode = file->f_dentry->d_inode; 385 char pathname[80], *path; 386 387 va_start(args, fmt); 388 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 389 if (!path) 390 path = "(unknown)"; 391 printk(KERN_CRIT 392 "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", 393 inode->i_sb->s_id, function, inode->i_ino, current->comm, path); 394 vprintk(fmt, args); 395 printk("\n"); 396 va_end(args); 397 398 ext4_handle_error(inode->i_sb); 399 } 400 401 static const char *ext4_decode_error(struct super_block *sb, int errno, 402 char nbuf[16]) 403 { 404 char *errstr = NULL; 405 406 switch (errno) { 407 case -EIO: 408 errstr = "IO failure"; 409 break; 410 case -ENOMEM: 411 errstr = "Out of memory"; 412 break; 413 case -EROFS: 414 if (!sb || (EXT4_SB(sb)->s_journal && 415 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 416 errstr = "Journal has aborted"; 417 else 418 errstr = "Readonly filesystem"; 419 break; 420 default: 421 /* If the caller passed in an extra buffer for unknown 422 * errors, textualise them now. Else we just return 423 * NULL. */ 424 if (nbuf) { 425 /* Check for truncated error codes... */ 426 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 427 errstr = nbuf; 428 } 429 break; 430 } 431 432 return errstr; 433 } 434 435 /* __ext4_std_error decodes expected errors from journaling functions 436 * automatically and invokes the appropriate error response. */ 437 438 void __ext4_std_error(struct super_block *sb, const char *function, int errno) 439 { 440 char nbuf[16]; 441 const char *errstr; 442 443 /* Special case: if the error is EROFS, and we're not already 444 * inside a transaction, then there's really no point in logging 445 * an error. */ 446 if (errno == -EROFS && journal_current_handle() == NULL && 447 (sb->s_flags & MS_RDONLY)) 448 return; 449 450 errstr = ext4_decode_error(sb, errno, nbuf); 451 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 452 sb->s_id, function, errstr); 453 454 ext4_handle_error(sb); 455 } 456 457 /* 458 * ext4_abort is a much stronger failure handler than ext4_error. The 459 * abort function may be used to deal with unrecoverable failures such 460 * as journal IO errors or ENOMEM at a critical moment in log management. 461 * 462 * We unconditionally force the filesystem into an ABORT|READONLY state, 463 * unless the error response on the fs has been set to panic in which 464 * case we take the easy way out and panic immediately. 465 */ 466 467 void ext4_abort(struct super_block *sb, const char *function, 468 const char *fmt, ...) 469 { 470 va_list args; 471 472 va_start(args, fmt); 473 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 474 vprintk(fmt, args); 475 printk("\n"); 476 va_end(args); 477 478 if (test_opt(sb, ERRORS_PANIC)) 479 panic("EXT4-fs panic from previous error\n"); 480 481 if (sb->s_flags & MS_RDONLY) 482 return; 483 484 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 485 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 486 sb->s_flags |= MS_RDONLY; 487 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 488 if (EXT4_SB(sb)->s_journal) 489 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 490 } 491 492 void ext4_msg (struct super_block * sb, const char *prefix, 493 const char *fmt, ...) 494 { 495 va_list args; 496 497 va_start(args, fmt); 498 printk("%sEXT4-fs (%s): ", prefix, sb->s_id); 499 vprintk(fmt, args); 500 printk("\n"); 501 va_end(args); 502 } 503 504 void __ext4_warning(struct super_block *sb, const char *function, 505 const char *fmt, ...) 506 { 507 va_list args; 508 509 va_start(args, fmt); 510 printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", 511 sb->s_id, function); 512 vprintk(fmt, args); 513 printk("\n"); 514 va_end(args); 515 } 516 517 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, 518 const char *function, const char *fmt, ...) 519 __releases(bitlock) 520 __acquires(bitlock) 521 { 522 va_list args; 523 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 524 525 va_start(args, fmt); 526 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); 527 vprintk(fmt, args); 528 printk("\n"); 529 va_end(args); 530 531 if (test_opt(sb, ERRORS_CONT)) { 532 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 533 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 534 ext4_commit_super(sb, 0); 535 return; 536 } 537 ext4_unlock_group(sb, grp); 538 ext4_handle_error(sb); 539 /* 540 * We only get here in the ERRORS_RO case; relocking the group 541 * may be dangerous, but nothing bad will happen since the 542 * filesystem will have already been marked read/only and the 543 * journal has been aborted. We return 1 as a hint to callers 544 * who might what to use the return value from 545 * ext4_grp_locked_error() to distinguish beween the 546 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 547 * aggressively from the ext4 function in question, with a 548 * more appropriate error code. 549 */ 550 ext4_lock_group(sb, grp); 551 return; 552 } 553 554 void ext4_update_dynamic_rev(struct super_block *sb) 555 { 556 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 557 558 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 559 return; 560 561 ext4_warning(sb, 562 "updating to rev %d because of new feature flag, " 563 "running e2fsck is recommended", 564 EXT4_DYNAMIC_REV); 565 566 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 567 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 568 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 569 /* leave es->s_feature_*compat flags alone */ 570 /* es->s_uuid will be set by e2fsck if empty */ 571 572 /* 573 * The rest of the superblock fields should be zero, and if not it 574 * means they are likely already in use, so leave them alone. We 575 * can leave it up to e2fsck to clean up any inconsistencies there. 576 */ 577 } 578 579 /* 580 * Open the external journal device 581 */ 582 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 583 { 584 struct block_device *bdev; 585 char b[BDEVNAME_SIZE]; 586 587 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); 588 if (IS_ERR(bdev)) 589 goto fail; 590 return bdev; 591 592 fail: 593 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 594 __bdevname(dev, b), PTR_ERR(bdev)); 595 return NULL; 596 } 597 598 /* 599 * Release the journal device 600 */ 601 static int ext4_blkdev_put(struct block_device *bdev) 602 { 603 bd_release(bdev); 604 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 605 } 606 607 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 608 { 609 struct block_device *bdev; 610 int ret = -ENODEV; 611 612 bdev = sbi->journal_bdev; 613 if (bdev) { 614 ret = ext4_blkdev_put(bdev); 615 sbi->journal_bdev = NULL; 616 } 617 return ret; 618 } 619 620 static inline struct inode *orphan_list_entry(struct list_head *l) 621 { 622 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 623 } 624 625 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 626 { 627 struct list_head *l; 628 629 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 630 le32_to_cpu(sbi->s_es->s_last_orphan)); 631 632 printk(KERN_ERR "sb_info orphan list:\n"); 633 list_for_each(l, &sbi->s_orphan) { 634 struct inode *inode = orphan_list_entry(l); 635 printk(KERN_ERR " " 636 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 637 inode->i_sb->s_id, inode->i_ino, inode, 638 inode->i_mode, inode->i_nlink, 639 NEXT_ORPHAN(inode)); 640 } 641 } 642 643 static void ext4_put_super(struct super_block *sb) 644 { 645 struct ext4_sb_info *sbi = EXT4_SB(sb); 646 struct ext4_super_block *es = sbi->s_es; 647 int i, err; 648 649 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 650 651 flush_workqueue(sbi->dio_unwritten_wq); 652 destroy_workqueue(sbi->dio_unwritten_wq); 653 654 lock_super(sb); 655 lock_kernel(); 656 if (sb->s_dirt) 657 ext4_commit_super(sb, 1); 658 659 if (sbi->s_journal) { 660 err = jbd2_journal_destroy(sbi->s_journal); 661 sbi->s_journal = NULL; 662 if (err < 0) 663 ext4_abort(sb, __func__, 664 "Couldn't clean up the journal"); 665 } 666 667 ext4_release_system_zone(sb); 668 ext4_mb_release(sb); 669 ext4_ext_release(sb); 670 ext4_xattr_put_super(sb); 671 672 if (!(sb->s_flags & MS_RDONLY)) { 673 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 674 es->s_state = cpu_to_le16(sbi->s_mount_state); 675 ext4_commit_super(sb, 1); 676 } 677 if (sbi->s_proc) { 678 remove_proc_entry(sb->s_id, ext4_proc_root); 679 } 680 kobject_del(&sbi->s_kobj); 681 682 for (i = 0; i < sbi->s_gdb_count; i++) 683 brelse(sbi->s_group_desc[i]); 684 kfree(sbi->s_group_desc); 685 if (is_vmalloc_addr(sbi->s_flex_groups)) 686 vfree(sbi->s_flex_groups); 687 else 688 kfree(sbi->s_flex_groups); 689 percpu_counter_destroy(&sbi->s_freeblocks_counter); 690 percpu_counter_destroy(&sbi->s_freeinodes_counter); 691 percpu_counter_destroy(&sbi->s_dirs_counter); 692 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 693 brelse(sbi->s_sbh); 694 #ifdef CONFIG_QUOTA 695 for (i = 0; i < MAXQUOTAS; i++) 696 kfree(sbi->s_qf_names[i]); 697 #endif 698 699 /* Debugging code just in case the in-memory inode orphan list 700 * isn't empty. The on-disk one can be non-empty if we've 701 * detected an error and taken the fs readonly, but the 702 * in-memory list had better be clean by this point. */ 703 if (!list_empty(&sbi->s_orphan)) 704 dump_orphan_list(sb, sbi); 705 J_ASSERT(list_empty(&sbi->s_orphan)); 706 707 invalidate_bdev(sb->s_bdev); 708 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 709 /* 710 * Invalidate the journal device's buffers. We don't want them 711 * floating about in memory - the physical journal device may 712 * hotswapped, and it breaks the `ro-after' testing code. 713 */ 714 sync_blockdev(sbi->journal_bdev); 715 invalidate_bdev(sbi->journal_bdev); 716 ext4_blkdev_remove(sbi); 717 } 718 sb->s_fs_info = NULL; 719 /* 720 * Now that we are completely done shutting down the 721 * superblock, we need to actually destroy the kobject. 722 */ 723 unlock_kernel(); 724 unlock_super(sb); 725 kobject_put(&sbi->s_kobj); 726 wait_for_completion(&sbi->s_kobj_unregister); 727 kfree(sbi->s_blockgroup_lock); 728 kfree(sbi); 729 } 730 731 static struct kmem_cache *ext4_inode_cachep; 732 733 /* 734 * Called inside transaction, so use GFP_NOFS 735 */ 736 static struct inode *ext4_alloc_inode(struct super_block *sb) 737 { 738 struct ext4_inode_info *ei; 739 740 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 741 if (!ei) 742 return NULL; 743 744 ei->vfs_inode.i_version = 1; 745 ei->vfs_inode.i_data.writeback_index = 0; 746 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 747 INIT_LIST_HEAD(&ei->i_prealloc_list); 748 spin_lock_init(&ei->i_prealloc_lock); 749 /* 750 * Note: We can be called before EXT4_SB(sb)->s_journal is set, 751 * therefore it can be null here. Don't check it, just initialize 752 * jinode. 753 */ 754 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); 755 ei->i_reserved_data_blocks = 0; 756 ei->i_reserved_meta_blocks = 0; 757 ei->i_allocated_meta_blocks = 0; 758 ei->i_da_metadata_calc_len = 0; 759 ei->i_delalloc_reserved_flag = 0; 760 spin_lock_init(&(ei->i_block_reservation_lock)); 761 #ifdef CONFIG_QUOTA 762 ei->i_reserved_quota = 0; 763 #endif 764 INIT_LIST_HEAD(&ei->i_completed_io_list); 765 spin_lock_init(&ei->i_completed_io_lock); 766 ei->cur_aio_dio = NULL; 767 ei->i_sync_tid = 0; 768 ei->i_datasync_tid = 0; 769 770 return &ei->vfs_inode; 771 } 772 773 static void ext4_destroy_inode(struct inode *inode) 774 { 775 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 776 ext4_msg(inode->i_sb, KERN_ERR, 777 "Inode %lu (%p): orphan list check failed!", 778 inode->i_ino, EXT4_I(inode)); 779 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 780 EXT4_I(inode), sizeof(struct ext4_inode_info), 781 true); 782 dump_stack(); 783 } 784 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 785 } 786 787 static void init_once(void *foo) 788 { 789 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 790 791 INIT_LIST_HEAD(&ei->i_orphan); 792 #ifdef CONFIG_EXT4_FS_XATTR 793 init_rwsem(&ei->xattr_sem); 794 #endif 795 init_rwsem(&ei->i_data_sem); 796 inode_init_once(&ei->vfs_inode); 797 } 798 799 static int init_inodecache(void) 800 { 801 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 802 sizeof(struct ext4_inode_info), 803 0, (SLAB_RECLAIM_ACCOUNT| 804 SLAB_MEM_SPREAD), 805 init_once); 806 if (ext4_inode_cachep == NULL) 807 return -ENOMEM; 808 return 0; 809 } 810 811 static void destroy_inodecache(void) 812 { 813 kmem_cache_destroy(ext4_inode_cachep); 814 } 815 816 static void ext4_clear_inode(struct inode *inode) 817 { 818 dquot_drop(inode); 819 ext4_discard_preallocations(inode); 820 if (EXT4_JOURNAL(inode)) 821 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 822 &EXT4_I(inode)->jinode); 823 } 824 825 static inline void ext4_show_quota_options(struct seq_file *seq, 826 struct super_block *sb) 827 { 828 #if defined(CONFIG_QUOTA) 829 struct ext4_sb_info *sbi = EXT4_SB(sb); 830 831 if (sbi->s_jquota_fmt) { 832 char *fmtname = ""; 833 834 switch (sbi->s_jquota_fmt) { 835 case QFMT_VFS_OLD: 836 fmtname = "vfsold"; 837 break; 838 case QFMT_VFS_V0: 839 fmtname = "vfsv0"; 840 break; 841 case QFMT_VFS_V1: 842 fmtname = "vfsv1"; 843 break; 844 } 845 seq_printf(seq, ",jqfmt=%s", fmtname); 846 } 847 848 if (sbi->s_qf_names[USRQUOTA]) 849 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 850 851 if (sbi->s_qf_names[GRPQUOTA]) 852 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 853 854 if (test_opt(sb, USRQUOTA)) 855 seq_puts(seq, ",usrquota"); 856 857 if (test_opt(sb, GRPQUOTA)) 858 seq_puts(seq, ",grpquota"); 859 #endif 860 } 861 862 /* 863 * Show an option if 864 * - it's set to a non-default value OR 865 * - if the per-sb default is different from the global default 866 */ 867 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 868 { 869 int def_errors; 870 unsigned long def_mount_opts; 871 struct super_block *sb = vfs->mnt_sb; 872 struct ext4_sb_info *sbi = EXT4_SB(sb); 873 struct ext4_super_block *es = sbi->s_es; 874 875 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 876 def_errors = le16_to_cpu(es->s_errors); 877 878 if (sbi->s_sb_block != 1) 879 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 880 if (test_opt(sb, MINIX_DF)) 881 seq_puts(seq, ",minixdf"); 882 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 883 seq_puts(seq, ",grpid"); 884 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 885 seq_puts(seq, ",nogrpid"); 886 if (sbi->s_resuid != EXT4_DEF_RESUID || 887 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 888 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 889 } 890 if (sbi->s_resgid != EXT4_DEF_RESGID || 891 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 892 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 893 } 894 if (test_opt(sb, ERRORS_RO)) { 895 if (def_errors == EXT4_ERRORS_PANIC || 896 def_errors == EXT4_ERRORS_CONTINUE) { 897 seq_puts(seq, ",errors=remount-ro"); 898 } 899 } 900 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 901 seq_puts(seq, ",errors=continue"); 902 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 903 seq_puts(seq, ",errors=panic"); 904 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 905 seq_puts(seq, ",nouid32"); 906 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 907 seq_puts(seq, ",debug"); 908 if (test_opt(sb, OLDALLOC)) 909 seq_puts(seq, ",oldalloc"); 910 #ifdef CONFIG_EXT4_FS_XATTR 911 if (test_opt(sb, XATTR_USER) && 912 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 913 seq_puts(seq, ",user_xattr"); 914 if (!test_opt(sb, XATTR_USER) && 915 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 916 seq_puts(seq, ",nouser_xattr"); 917 } 918 #endif 919 #ifdef CONFIG_EXT4_FS_POSIX_ACL 920 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 921 seq_puts(seq, ",acl"); 922 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 923 seq_puts(seq, ",noacl"); 924 #endif 925 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 926 seq_printf(seq, ",commit=%u", 927 (unsigned) (sbi->s_commit_interval / HZ)); 928 } 929 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 930 seq_printf(seq, ",min_batch_time=%u", 931 (unsigned) sbi->s_min_batch_time); 932 } 933 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 934 seq_printf(seq, ",max_batch_time=%u", 935 (unsigned) sbi->s_min_batch_time); 936 } 937 938 /* 939 * We're changing the default of barrier mount option, so 940 * let's always display its mount state so it's clear what its 941 * status is. 942 */ 943 seq_puts(seq, ",barrier="); 944 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 945 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 946 seq_puts(seq, ",journal_async_commit"); 947 else if (test_opt(sb, JOURNAL_CHECKSUM)) 948 seq_puts(seq, ",journal_checksum"); 949 if (test_opt(sb, NOBH)) 950 seq_puts(seq, ",nobh"); 951 if (test_opt(sb, I_VERSION)) 952 seq_puts(seq, ",i_version"); 953 if (!test_opt(sb, DELALLOC)) 954 seq_puts(seq, ",nodelalloc"); 955 956 957 if (sbi->s_stripe) 958 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 959 /* 960 * journal mode get enabled in different ways 961 * So just print the value even if we didn't specify it 962 */ 963 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 964 seq_puts(seq, ",data=journal"); 965 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 966 seq_puts(seq, ",data=ordered"); 967 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 968 seq_puts(seq, ",data=writeback"); 969 970 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 971 seq_printf(seq, ",inode_readahead_blks=%u", 972 sbi->s_inode_readahead_blks); 973 974 if (test_opt(sb, DATA_ERR_ABORT)) 975 seq_puts(seq, ",data_err=abort"); 976 977 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 978 seq_puts(seq, ",noauto_da_alloc"); 979 980 if (test_opt(sb, DISCARD)) 981 seq_puts(seq, ",discard"); 982 983 if (test_opt(sb, NOLOAD)) 984 seq_puts(seq, ",norecovery"); 985 986 if (test_opt(sb, DIOREAD_NOLOCK)) 987 seq_puts(seq, ",dioread_nolock"); 988 989 ext4_show_quota_options(seq, sb); 990 991 return 0; 992 } 993 994 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 995 u64 ino, u32 generation) 996 { 997 struct inode *inode; 998 999 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 1000 return ERR_PTR(-ESTALE); 1001 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 1002 return ERR_PTR(-ESTALE); 1003 1004 /* iget isn't really right if the inode is currently unallocated!! 1005 * 1006 * ext4_read_inode will return a bad_inode if the inode had been 1007 * deleted, so we should be safe. 1008 * 1009 * Currently we don't know the generation for parent directory, so 1010 * a generation of 0 means "accept any" 1011 */ 1012 inode = ext4_iget(sb, ino); 1013 if (IS_ERR(inode)) 1014 return ERR_CAST(inode); 1015 if (generation && inode->i_generation != generation) { 1016 iput(inode); 1017 return ERR_PTR(-ESTALE); 1018 } 1019 1020 return inode; 1021 } 1022 1023 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1024 int fh_len, int fh_type) 1025 { 1026 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1027 ext4_nfs_get_inode); 1028 } 1029 1030 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1031 int fh_len, int fh_type) 1032 { 1033 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1034 ext4_nfs_get_inode); 1035 } 1036 1037 /* 1038 * Try to release metadata pages (indirect blocks, directories) which are 1039 * mapped via the block device. Since these pages could have journal heads 1040 * which would prevent try_to_free_buffers() from freeing them, we must use 1041 * jbd2 layer's try_to_free_buffers() function to release them. 1042 */ 1043 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1044 gfp_t wait) 1045 { 1046 journal_t *journal = EXT4_SB(sb)->s_journal; 1047 1048 WARN_ON(PageChecked(page)); 1049 if (!page_has_buffers(page)) 1050 return 0; 1051 if (journal) 1052 return jbd2_journal_try_to_free_buffers(journal, page, 1053 wait & ~__GFP_WAIT); 1054 return try_to_free_buffers(page); 1055 } 1056 1057 #ifdef CONFIG_QUOTA 1058 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1059 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1060 1061 static int ext4_write_dquot(struct dquot *dquot); 1062 static int ext4_acquire_dquot(struct dquot *dquot); 1063 static int ext4_release_dquot(struct dquot *dquot); 1064 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1065 static int ext4_write_info(struct super_block *sb, int type); 1066 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1067 char *path); 1068 static int ext4_quota_on_mount(struct super_block *sb, int type); 1069 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1070 size_t len, loff_t off); 1071 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1072 const char *data, size_t len, loff_t off); 1073 1074 static const struct dquot_operations ext4_quota_operations = { 1075 #ifdef CONFIG_QUOTA 1076 .get_reserved_space = ext4_get_reserved_space, 1077 #endif 1078 .write_dquot = ext4_write_dquot, 1079 .acquire_dquot = ext4_acquire_dquot, 1080 .release_dquot = ext4_release_dquot, 1081 .mark_dirty = ext4_mark_dquot_dirty, 1082 .write_info = ext4_write_info, 1083 .alloc_dquot = dquot_alloc, 1084 .destroy_dquot = dquot_destroy, 1085 }; 1086 1087 static const struct quotactl_ops ext4_qctl_operations = { 1088 .quota_on = ext4_quota_on, 1089 .quota_off = dquot_quota_off, 1090 .quota_sync = dquot_quota_sync, 1091 .get_info = dquot_get_dqinfo, 1092 .set_info = dquot_set_dqinfo, 1093 .get_dqblk = dquot_get_dqblk, 1094 .set_dqblk = dquot_set_dqblk 1095 }; 1096 #endif 1097 1098 static const struct super_operations ext4_sops = { 1099 .alloc_inode = ext4_alloc_inode, 1100 .destroy_inode = ext4_destroy_inode, 1101 .write_inode = ext4_write_inode, 1102 .dirty_inode = ext4_dirty_inode, 1103 .delete_inode = ext4_delete_inode, 1104 .put_super = ext4_put_super, 1105 .sync_fs = ext4_sync_fs, 1106 .freeze_fs = ext4_freeze, 1107 .unfreeze_fs = ext4_unfreeze, 1108 .statfs = ext4_statfs, 1109 .remount_fs = ext4_remount, 1110 .clear_inode = ext4_clear_inode, 1111 .show_options = ext4_show_options, 1112 #ifdef CONFIG_QUOTA 1113 .quota_read = ext4_quota_read, 1114 .quota_write = ext4_quota_write, 1115 #endif 1116 .bdev_try_to_free_page = bdev_try_to_free_page, 1117 }; 1118 1119 static const struct super_operations ext4_nojournal_sops = { 1120 .alloc_inode = ext4_alloc_inode, 1121 .destroy_inode = ext4_destroy_inode, 1122 .write_inode = ext4_write_inode, 1123 .dirty_inode = ext4_dirty_inode, 1124 .delete_inode = ext4_delete_inode, 1125 .write_super = ext4_write_super, 1126 .put_super = ext4_put_super, 1127 .statfs = ext4_statfs, 1128 .remount_fs = ext4_remount, 1129 .clear_inode = ext4_clear_inode, 1130 .show_options = ext4_show_options, 1131 #ifdef CONFIG_QUOTA 1132 .quota_read = ext4_quota_read, 1133 .quota_write = ext4_quota_write, 1134 #endif 1135 .bdev_try_to_free_page = bdev_try_to_free_page, 1136 }; 1137 1138 static const struct export_operations ext4_export_ops = { 1139 .fh_to_dentry = ext4_fh_to_dentry, 1140 .fh_to_parent = ext4_fh_to_parent, 1141 .get_parent = ext4_get_parent, 1142 }; 1143 1144 enum { 1145 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1146 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1147 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1148 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1149 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1150 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1151 Opt_journal_update, Opt_journal_dev, 1152 Opt_journal_checksum, Opt_journal_async_commit, 1153 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1154 Opt_data_err_abort, Opt_data_err_ignore, 1155 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1156 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1157 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1158 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1159 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1160 Opt_block_validity, Opt_noblock_validity, 1161 Opt_inode_readahead_blks, Opt_journal_ioprio, 1162 Opt_dioread_nolock, Opt_dioread_lock, 1163 Opt_discard, Opt_nodiscard, 1164 }; 1165 1166 static const match_table_t tokens = { 1167 {Opt_bsd_df, "bsddf"}, 1168 {Opt_minix_df, "minixdf"}, 1169 {Opt_grpid, "grpid"}, 1170 {Opt_grpid, "bsdgroups"}, 1171 {Opt_nogrpid, "nogrpid"}, 1172 {Opt_nogrpid, "sysvgroups"}, 1173 {Opt_resgid, "resgid=%u"}, 1174 {Opt_resuid, "resuid=%u"}, 1175 {Opt_sb, "sb=%u"}, 1176 {Opt_err_cont, "errors=continue"}, 1177 {Opt_err_panic, "errors=panic"}, 1178 {Opt_err_ro, "errors=remount-ro"}, 1179 {Opt_nouid32, "nouid32"}, 1180 {Opt_debug, "debug"}, 1181 {Opt_oldalloc, "oldalloc"}, 1182 {Opt_orlov, "orlov"}, 1183 {Opt_user_xattr, "user_xattr"}, 1184 {Opt_nouser_xattr, "nouser_xattr"}, 1185 {Opt_acl, "acl"}, 1186 {Opt_noacl, "noacl"}, 1187 {Opt_noload, "noload"}, 1188 {Opt_noload, "norecovery"}, 1189 {Opt_nobh, "nobh"}, 1190 {Opt_bh, "bh"}, 1191 {Opt_commit, "commit=%u"}, 1192 {Opt_min_batch_time, "min_batch_time=%u"}, 1193 {Opt_max_batch_time, "max_batch_time=%u"}, 1194 {Opt_journal_update, "journal=update"}, 1195 {Opt_journal_dev, "journal_dev=%u"}, 1196 {Opt_journal_checksum, "journal_checksum"}, 1197 {Opt_journal_async_commit, "journal_async_commit"}, 1198 {Opt_abort, "abort"}, 1199 {Opt_data_journal, "data=journal"}, 1200 {Opt_data_ordered, "data=ordered"}, 1201 {Opt_data_writeback, "data=writeback"}, 1202 {Opt_data_err_abort, "data_err=abort"}, 1203 {Opt_data_err_ignore, "data_err=ignore"}, 1204 {Opt_offusrjquota, "usrjquota="}, 1205 {Opt_usrjquota, "usrjquota=%s"}, 1206 {Opt_offgrpjquota, "grpjquota="}, 1207 {Opt_grpjquota, "grpjquota=%s"}, 1208 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1209 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1210 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1211 {Opt_grpquota, "grpquota"}, 1212 {Opt_noquota, "noquota"}, 1213 {Opt_quota, "quota"}, 1214 {Opt_usrquota, "usrquota"}, 1215 {Opt_barrier, "barrier=%u"}, 1216 {Opt_barrier, "barrier"}, 1217 {Opt_nobarrier, "nobarrier"}, 1218 {Opt_i_version, "i_version"}, 1219 {Opt_stripe, "stripe=%u"}, 1220 {Opt_resize, "resize"}, 1221 {Opt_delalloc, "delalloc"}, 1222 {Opt_nodelalloc, "nodelalloc"}, 1223 {Opt_block_validity, "block_validity"}, 1224 {Opt_noblock_validity, "noblock_validity"}, 1225 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1226 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1227 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1228 {Opt_auto_da_alloc, "auto_da_alloc"}, 1229 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1230 {Opt_dioread_nolock, "dioread_nolock"}, 1231 {Opt_dioread_lock, "dioread_lock"}, 1232 {Opt_discard, "discard"}, 1233 {Opt_nodiscard, "nodiscard"}, 1234 {Opt_err, NULL}, 1235 }; 1236 1237 static ext4_fsblk_t get_sb_block(void **data) 1238 { 1239 ext4_fsblk_t sb_block; 1240 char *options = (char *) *data; 1241 1242 if (!options || strncmp(options, "sb=", 3) != 0) 1243 return 1; /* Default location */ 1244 1245 options += 3; 1246 /* TODO: use simple_strtoll with >32bit ext4 */ 1247 sb_block = simple_strtoul(options, &options, 0); 1248 if (*options && *options != ',') { 1249 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1250 (char *) *data); 1251 return 1; 1252 } 1253 if (*options == ',') 1254 options++; 1255 *data = (void *) options; 1256 1257 return sb_block; 1258 } 1259 1260 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1261 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1262 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1263 1264 #ifdef CONFIG_QUOTA 1265 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1266 { 1267 struct ext4_sb_info *sbi = EXT4_SB(sb); 1268 char *qname; 1269 1270 if (sb_any_quota_loaded(sb) && 1271 !sbi->s_qf_names[qtype]) { 1272 ext4_msg(sb, KERN_ERR, 1273 "Cannot change journaled " 1274 "quota options when quota turned on"); 1275 return 0; 1276 } 1277 qname = match_strdup(args); 1278 if (!qname) { 1279 ext4_msg(sb, KERN_ERR, 1280 "Not enough memory for storing quotafile name"); 1281 return 0; 1282 } 1283 if (sbi->s_qf_names[qtype] && 1284 strcmp(sbi->s_qf_names[qtype], qname)) { 1285 ext4_msg(sb, KERN_ERR, 1286 "%s quota file already specified", QTYPE2NAME(qtype)); 1287 kfree(qname); 1288 return 0; 1289 } 1290 sbi->s_qf_names[qtype] = qname; 1291 if (strchr(sbi->s_qf_names[qtype], '/')) { 1292 ext4_msg(sb, KERN_ERR, 1293 "quotafile must be on filesystem root"); 1294 kfree(sbi->s_qf_names[qtype]); 1295 sbi->s_qf_names[qtype] = NULL; 1296 return 0; 1297 } 1298 set_opt(sbi->s_mount_opt, QUOTA); 1299 return 1; 1300 } 1301 1302 static int clear_qf_name(struct super_block *sb, int qtype) 1303 { 1304 1305 struct ext4_sb_info *sbi = EXT4_SB(sb); 1306 1307 if (sb_any_quota_loaded(sb) && 1308 sbi->s_qf_names[qtype]) { 1309 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1310 " when quota turned on"); 1311 return 0; 1312 } 1313 /* 1314 * The space will be released later when all options are confirmed 1315 * to be correct 1316 */ 1317 sbi->s_qf_names[qtype] = NULL; 1318 return 1; 1319 } 1320 #endif 1321 1322 static int parse_options(char *options, struct super_block *sb, 1323 unsigned long *journal_devnum, 1324 unsigned int *journal_ioprio, 1325 ext4_fsblk_t *n_blocks_count, int is_remount) 1326 { 1327 struct ext4_sb_info *sbi = EXT4_SB(sb); 1328 char *p; 1329 substring_t args[MAX_OPT_ARGS]; 1330 int data_opt = 0; 1331 int option; 1332 #ifdef CONFIG_QUOTA 1333 int qfmt; 1334 #endif 1335 1336 if (!options) 1337 return 1; 1338 1339 while ((p = strsep(&options, ",")) != NULL) { 1340 int token; 1341 if (!*p) 1342 continue; 1343 1344 /* 1345 * Initialize args struct so we know whether arg was 1346 * found; some options take optional arguments. 1347 */ 1348 args[0].to = args[0].from = 0; 1349 token = match_token(p, tokens, args); 1350 switch (token) { 1351 case Opt_bsd_df: 1352 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1353 clear_opt(sbi->s_mount_opt, MINIX_DF); 1354 break; 1355 case Opt_minix_df: 1356 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1357 set_opt(sbi->s_mount_opt, MINIX_DF); 1358 1359 break; 1360 case Opt_grpid: 1361 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1362 set_opt(sbi->s_mount_opt, GRPID); 1363 1364 break; 1365 case Opt_nogrpid: 1366 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1367 clear_opt(sbi->s_mount_opt, GRPID); 1368 1369 break; 1370 case Opt_resuid: 1371 if (match_int(&args[0], &option)) 1372 return 0; 1373 sbi->s_resuid = option; 1374 break; 1375 case Opt_resgid: 1376 if (match_int(&args[0], &option)) 1377 return 0; 1378 sbi->s_resgid = option; 1379 break; 1380 case Opt_sb: 1381 /* handled by get_sb_block() instead of here */ 1382 /* *sb_block = match_int(&args[0]); */ 1383 break; 1384 case Opt_err_panic: 1385 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1386 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1387 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1388 break; 1389 case Opt_err_ro: 1390 clear_opt(sbi->s_mount_opt, ERRORS_CONT); 1391 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1392 set_opt(sbi->s_mount_opt, ERRORS_RO); 1393 break; 1394 case Opt_err_cont: 1395 clear_opt(sbi->s_mount_opt, ERRORS_RO); 1396 clear_opt(sbi->s_mount_opt, ERRORS_PANIC); 1397 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1398 break; 1399 case Opt_nouid32: 1400 set_opt(sbi->s_mount_opt, NO_UID32); 1401 break; 1402 case Opt_debug: 1403 set_opt(sbi->s_mount_opt, DEBUG); 1404 break; 1405 case Opt_oldalloc: 1406 set_opt(sbi->s_mount_opt, OLDALLOC); 1407 break; 1408 case Opt_orlov: 1409 clear_opt(sbi->s_mount_opt, OLDALLOC); 1410 break; 1411 #ifdef CONFIG_EXT4_FS_XATTR 1412 case Opt_user_xattr: 1413 set_opt(sbi->s_mount_opt, XATTR_USER); 1414 break; 1415 case Opt_nouser_xattr: 1416 clear_opt(sbi->s_mount_opt, XATTR_USER); 1417 break; 1418 #else 1419 case Opt_user_xattr: 1420 case Opt_nouser_xattr: 1421 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1422 break; 1423 #endif 1424 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1425 case Opt_acl: 1426 set_opt(sbi->s_mount_opt, POSIX_ACL); 1427 break; 1428 case Opt_noacl: 1429 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1430 break; 1431 #else 1432 case Opt_acl: 1433 case Opt_noacl: 1434 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1435 break; 1436 #endif 1437 case Opt_journal_update: 1438 /* @@@ FIXME */ 1439 /* Eventually we will want to be able to create 1440 a journal file here. For now, only allow the 1441 user to specify an existing inode to be the 1442 journal file. */ 1443 if (is_remount) { 1444 ext4_msg(sb, KERN_ERR, 1445 "Cannot specify journal on remount"); 1446 return 0; 1447 } 1448 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); 1449 break; 1450 case Opt_journal_dev: 1451 if (is_remount) { 1452 ext4_msg(sb, KERN_ERR, 1453 "Cannot specify journal on remount"); 1454 return 0; 1455 } 1456 if (match_int(&args[0], &option)) 1457 return 0; 1458 *journal_devnum = option; 1459 break; 1460 case Opt_journal_checksum: 1461 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1462 break; 1463 case Opt_journal_async_commit: 1464 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1465 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1466 break; 1467 case Opt_noload: 1468 set_opt(sbi->s_mount_opt, NOLOAD); 1469 break; 1470 case Opt_commit: 1471 if (match_int(&args[0], &option)) 1472 return 0; 1473 if (option < 0) 1474 return 0; 1475 if (option == 0) 1476 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1477 sbi->s_commit_interval = HZ * option; 1478 break; 1479 case Opt_max_batch_time: 1480 if (match_int(&args[0], &option)) 1481 return 0; 1482 if (option < 0) 1483 return 0; 1484 if (option == 0) 1485 option = EXT4_DEF_MAX_BATCH_TIME; 1486 sbi->s_max_batch_time = option; 1487 break; 1488 case Opt_min_batch_time: 1489 if (match_int(&args[0], &option)) 1490 return 0; 1491 if (option < 0) 1492 return 0; 1493 sbi->s_min_batch_time = option; 1494 break; 1495 case Opt_data_journal: 1496 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1497 goto datacheck; 1498 case Opt_data_ordered: 1499 data_opt = EXT4_MOUNT_ORDERED_DATA; 1500 goto datacheck; 1501 case Opt_data_writeback: 1502 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1503 datacheck: 1504 if (is_remount) { 1505 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1506 ext4_msg(sb, KERN_ERR, 1507 "Cannot change data mode on remount"); 1508 return 0; 1509 } 1510 } else { 1511 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1512 sbi->s_mount_opt |= data_opt; 1513 } 1514 break; 1515 case Opt_data_err_abort: 1516 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1517 break; 1518 case Opt_data_err_ignore: 1519 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1520 break; 1521 #ifdef CONFIG_QUOTA 1522 case Opt_usrjquota: 1523 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1524 return 0; 1525 break; 1526 case Opt_grpjquota: 1527 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1528 return 0; 1529 break; 1530 case Opt_offusrjquota: 1531 if (!clear_qf_name(sb, USRQUOTA)) 1532 return 0; 1533 break; 1534 case Opt_offgrpjquota: 1535 if (!clear_qf_name(sb, GRPQUOTA)) 1536 return 0; 1537 break; 1538 1539 case Opt_jqfmt_vfsold: 1540 qfmt = QFMT_VFS_OLD; 1541 goto set_qf_format; 1542 case Opt_jqfmt_vfsv0: 1543 qfmt = QFMT_VFS_V0; 1544 goto set_qf_format; 1545 case Opt_jqfmt_vfsv1: 1546 qfmt = QFMT_VFS_V1; 1547 set_qf_format: 1548 if (sb_any_quota_loaded(sb) && 1549 sbi->s_jquota_fmt != qfmt) { 1550 ext4_msg(sb, KERN_ERR, "Cannot change " 1551 "journaled quota options when " 1552 "quota turned on"); 1553 return 0; 1554 } 1555 sbi->s_jquota_fmt = qfmt; 1556 break; 1557 case Opt_quota: 1558 case Opt_usrquota: 1559 set_opt(sbi->s_mount_opt, QUOTA); 1560 set_opt(sbi->s_mount_opt, USRQUOTA); 1561 break; 1562 case Opt_grpquota: 1563 set_opt(sbi->s_mount_opt, QUOTA); 1564 set_opt(sbi->s_mount_opt, GRPQUOTA); 1565 break; 1566 case Opt_noquota: 1567 if (sb_any_quota_loaded(sb)) { 1568 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1569 "options when quota turned on"); 1570 return 0; 1571 } 1572 clear_opt(sbi->s_mount_opt, QUOTA); 1573 clear_opt(sbi->s_mount_opt, USRQUOTA); 1574 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1575 break; 1576 #else 1577 case Opt_quota: 1578 case Opt_usrquota: 1579 case Opt_grpquota: 1580 ext4_msg(sb, KERN_ERR, 1581 "quota options not supported"); 1582 break; 1583 case Opt_usrjquota: 1584 case Opt_grpjquota: 1585 case Opt_offusrjquota: 1586 case Opt_offgrpjquota: 1587 case Opt_jqfmt_vfsold: 1588 case Opt_jqfmt_vfsv0: 1589 case Opt_jqfmt_vfsv1: 1590 ext4_msg(sb, KERN_ERR, 1591 "journaled quota options not supported"); 1592 break; 1593 case Opt_noquota: 1594 break; 1595 #endif 1596 case Opt_abort: 1597 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1598 break; 1599 case Opt_nobarrier: 1600 clear_opt(sbi->s_mount_opt, BARRIER); 1601 break; 1602 case Opt_barrier: 1603 if (args[0].from) { 1604 if (match_int(&args[0], &option)) 1605 return 0; 1606 } else 1607 option = 1; /* No argument, default to 1 */ 1608 if (option) 1609 set_opt(sbi->s_mount_opt, BARRIER); 1610 else 1611 clear_opt(sbi->s_mount_opt, BARRIER); 1612 break; 1613 case Opt_ignore: 1614 break; 1615 case Opt_resize: 1616 if (!is_remount) { 1617 ext4_msg(sb, KERN_ERR, 1618 "resize option only available " 1619 "for remount"); 1620 return 0; 1621 } 1622 if (match_int(&args[0], &option) != 0) 1623 return 0; 1624 *n_blocks_count = option; 1625 break; 1626 case Opt_nobh: 1627 set_opt(sbi->s_mount_opt, NOBH); 1628 break; 1629 case Opt_bh: 1630 clear_opt(sbi->s_mount_opt, NOBH); 1631 break; 1632 case Opt_i_version: 1633 set_opt(sbi->s_mount_opt, I_VERSION); 1634 sb->s_flags |= MS_I_VERSION; 1635 break; 1636 case Opt_nodelalloc: 1637 clear_opt(sbi->s_mount_opt, DELALLOC); 1638 break; 1639 case Opt_stripe: 1640 if (match_int(&args[0], &option)) 1641 return 0; 1642 if (option < 0) 1643 return 0; 1644 sbi->s_stripe = option; 1645 break; 1646 case Opt_delalloc: 1647 set_opt(sbi->s_mount_opt, DELALLOC); 1648 break; 1649 case Opt_block_validity: 1650 set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1651 break; 1652 case Opt_noblock_validity: 1653 clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); 1654 break; 1655 case Opt_inode_readahead_blks: 1656 if (match_int(&args[0], &option)) 1657 return 0; 1658 if (option < 0 || option > (1 << 30)) 1659 return 0; 1660 if (!is_power_of_2(option)) { 1661 ext4_msg(sb, KERN_ERR, 1662 "EXT4-fs: inode_readahead_blks" 1663 " must be a power of 2"); 1664 return 0; 1665 } 1666 sbi->s_inode_readahead_blks = option; 1667 break; 1668 case Opt_journal_ioprio: 1669 if (match_int(&args[0], &option)) 1670 return 0; 1671 if (option < 0 || option > 7) 1672 break; 1673 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1674 option); 1675 break; 1676 case Opt_noauto_da_alloc: 1677 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1678 break; 1679 case Opt_auto_da_alloc: 1680 if (args[0].from) { 1681 if (match_int(&args[0], &option)) 1682 return 0; 1683 } else 1684 option = 1; /* No argument, default to 1 */ 1685 if (option) 1686 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); 1687 else 1688 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1689 break; 1690 case Opt_discard: 1691 set_opt(sbi->s_mount_opt, DISCARD); 1692 break; 1693 case Opt_nodiscard: 1694 clear_opt(sbi->s_mount_opt, DISCARD); 1695 break; 1696 case Opt_dioread_nolock: 1697 set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1698 break; 1699 case Opt_dioread_lock: 1700 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 1701 break; 1702 default: 1703 ext4_msg(sb, KERN_ERR, 1704 "Unrecognized mount option \"%s\" " 1705 "or missing value", p); 1706 return 0; 1707 } 1708 } 1709 #ifdef CONFIG_QUOTA 1710 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1711 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1712 clear_opt(sbi->s_mount_opt, USRQUOTA); 1713 1714 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1715 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1716 1717 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1718 ext4_msg(sb, KERN_ERR, "old and new quota " 1719 "format mixing"); 1720 return 0; 1721 } 1722 1723 if (!sbi->s_jquota_fmt) { 1724 ext4_msg(sb, KERN_ERR, "journaled quota format " 1725 "not specified"); 1726 return 0; 1727 } 1728 } else { 1729 if (sbi->s_jquota_fmt) { 1730 ext4_msg(sb, KERN_ERR, "journaled quota format " 1731 "specified with no journaling " 1732 "enabled"); 1733 return 0; 1734 } 1735 } 1736 #endif 1737 return 1; 1738 } 1739 1740 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1741 int read_only) 1742 { 1743 struct ext4_sb_info *sbi = EXT4_SB(sb); 1744 int res = 0; 1745 1746 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1747 ext4_msg(sb, KERN_ERR, "revision level too high, " 1748 "forcing read-only mode"); 1749 res = MS_RDONLY; 1750 } 1751 if (read_only) 1752 return res; 1753 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1754 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1755 "running e2fsck is recommended"); 1756 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1757 ext4_msg(sb, KERN_WARNING, 1758 "warning: mounting fs with errors, " 1759 "running e2fsck is recommended"); 1760 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1761 le16_to_cpu(es->s_mnt_count) >= 1762 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1763 ext4_msg(sb, KERN_WARNING, 1764 "warning: maximal mount count reached, " 1765 "running e2fsck is recommended"); 1766 else if (le32_to_cpu(es->s_checkinterval) && 1767 (le32_to_cpu(es->s_lastcheck) + 1768 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1769 ext4_msg(sb, KERN_WARNING, 1770 "warning: checktime reached, " 1771 "running e2fsck is recommended"); 1772 if (!sbi->s_journal) 1773 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1774 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1775 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1776 le16_add_cpu(&es->s_mnt_count, 1); 1777 es->s_mtime = cpu_to_le32(get_seconds()); 1778 ext4_update_dynamic_rev(sb); 1779 if (sbi->s_journal) 1780 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1781 1782 ext4_commit_super(sb, 1); 1783 if (test_opt(sb, DEBUG)) 1784 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1785 "bpg=%lu, ipg=%lu, mo=%04x]\n", 1786 sb->s_blocksize, 1787 sbi->s_groups_count, 1788 EXT4_BLOCKS_PER_GROUP(sb), 1789 EXT4_INODES_PER_GROUP(sb), 1790 sbi->s_mount_opt); 1791 1792 return res; 1793 } 1794 1795 static int ext4_fill_flex_info(struct super_block *sb) 1796 { 1797 struct ext4_sb_info *sbi = EXT4_SB(sb); 1798 struct ext4_group_desc *gdp = NULL; 1799 ext4_group_t flex_group_count; 1800 ext4_group_t flex_group; 1801 int groups_per_flex = 0; 1802 size_t size; 1803 int i; 1804 1805 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1806 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1807 1808 if (groups_per_flex < 2) { 1809 sbi->s_log_groups_per_flex = 0; 1810 return 1; 1811 } 1812 1813 /* We allocate both existing and potentially added groups */ 1814 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1815 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1816 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1817 size = flex_group_count * sizeof(struct flex_groups); 1818 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1819 if (sbi->s_flex_groups == NULL) { 1820 sbi->s_flex_groups = vmalloc(size); 1821 if (sbi->s_flex_groups) 1822 memset(sbi->s_flex_groups, 0, size); 1823 } 1824 if (sbi->s_flex_groups == NULL) { 1825 ext4_msg(sb, KERN_ERR, "not enough memory for " 1826 "%u flex groups", flex_group_count); 1827 goto failed; 1828 } 1829 1830 for (i = 0; i < sbi->s_groups_count; i++) { 1831 gdp = ext4_get_group_desc(sb, i, NULL); 1832 1833 flex_group = ext4_flex_group(sbi, i); 1834 atomic_add(ext4_free_inodes_count(sb, gdp), 1835 &sbi->s_flex_groups[flex_group].free_inodes); 1836 atomic_add(ext4_free_blks_count(sb, gdp), 1837 &sbi->s_flex_groups[flex_group].free_blocks); 1838 atomic_add(ext4_used_dirs_count(sb, gdp), 1839 &sbi->s_flex_groups[flex_group].used_dirs); 1840 } 1841 1842 return 1; 1843 failed: 1844 return 0; 1845 } 1846 1847 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1848 struct ext4_group_desc *gdp) 1849 { 1850 __u16 crc = 0; 1851 1852 if (sbi->s_es->s_feature_ro_compat & 1853 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1854 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1855 __le32 le_group = cpu_to_le32(block_group); 1856 1857 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1858 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1859 crc = crc16(crc, (__u8 *)gdp, offset); 1860 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1861 /* for checksum of struct ext4_group_desc do the rest...*/ 1862 if ((sbi->s_es->s_feature_incompat & 1863 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1864 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1865 crc = crc16(crc, (__u8 *)gdp + offset, 1866 le16_to_cpu(sbi->s_es->s_desc_size) - 1867 offset); 1868 } 1869 1870 return cpu_to_le16(crc); 1871 } 1872 1873 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1874 struct ext4_group_desc *gdp) 1875 { 1876 if ((sbi->s_es->s_feature_ro_compat & 1877 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 1878 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1879 return 0; 1880 1881 return 1; 1882 } 1883 1884 /* Called at mount-time, super-block is locked */ 1885 static int ext4_check_descriptors(struct super_block *sb) 1886 { 1887 struct ext4_sb_info *sbi = EXT4_SB(sb); 1888 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 1889 ext4_fsblk_t last_block; 1890 ext4_fsblk_t block_bitmap; 1891 ext4_fsblk_t inode_bitmap; 1892 ext4_fsblk_t inode_table; 1893 int flexbg_flag = 0; 1894 ext4_group_t i; 1895 1896 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1897 flexbg_flag = 1; 1898 1899 ext4_debug("Checking group descriptors"); 1900 1901 for (i = 0; i < sbi->s_groups_count; i++) { 1902 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1903 1904 if (i == sbi->s_groups_count - 1 || flexbg_flag) 1905 last_block = ext4_blocks_count(sbi->s_es) - 1; 1906 else 1907 last_block = first_block + 1908 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1909 1910 block_bitmap = ext4_block_bitmap(sb, gdp); 1911 if (block_bitmap < first_block || block_bitmap > last_block) { 1912 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1913 "Block bitmap for group %u not in group " 1914 "(block %llu)!", i, block_bitmap); 1915 return 0; 1916 } 1917 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1918 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1919 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1920 "Inode bitmap for group %u not in group " 1921 "(block %llu)!", i, inode_bitmap); 1922 return 0; 1923 } 1924 inode_table = ext4_inode_table(sb, gdp); 1925 if (inode_table < first_block || 1926 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1927 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1928 "Inode table for group %u not in group " 1929 "(block %llu)!", i, inode_table); 1930 return 0; 1931 } 1932 ext4_lock_group(sb, i); 1933 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1934 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 1935 "Checksum for group %u failed (%u!=%u)", 1936 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1937 gdp)), le16_to_cpu(gdp->bg_checksum)); 1938 if (!(sb->s_flags & MS_RDONLY)) { 1939 ext4_unlock_group(sb, i); 1940 return 0; 1941 } 1942 } 1943 ext4_unlock_group(sb, i); 1944 if (!flexbg_flag) 1945 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1946 } 1947 1948 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1949 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 1950 return 1; 1951 } 1952 1953 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 1954 * the superblock) which were deleted from all directories, but held open by 1955 * a process at the time of a crash. We walk the list and try to delete these 1956 * inodes at recovery time (only with a read-write filesystem). 1957 * 1958 * In order to keep the orphan inode chain consistent during traversal (in 1959 * case of crash during recovery), we link each inode into the superblock 1960 * orphan list_head and handle it the same way as an inode deletion during 1961 * normal operation (which journals the operations for us). 1962 * 1963 * We only do an iget() and an iput() on each inode, which is very safe if we 1964 * accidentally point at an in-use or already deleted inode. The worst that 1965 * can happen in this case is that we get a "bit already cleared" message from 1966 * ext4_free_inode(). The only reason we would point at a wrong inode is if 1967 * e2fsck was run on this filesystem, and it must have already done the orphan 1968 * inode cleanup for us, so we can safely abort without any further action. 1969 */ 1970 static void ext4_orphan_cleanup(struct super_block *sb, 1971 struct ext4_super_block *es) 1972 { 1973 unsigned int s_flags = sb->s_flags; 1974 int nr_orphans = 0, nr_truncates = 0; 1975 #ifdef CONFIG_QUOTA 1976 int i; 1977 #endif 1978 if (!es->s_last_orphan) { 1979 jbd_debug(4, "no orphan inodes to clean up\n"); 1980 return; 1981 } 1982 1983 if (bdev_read_only(sb->s_bdev)) { 1984 ext4_msg(sb, KERN_ERR, "write access " 1985 "unavailable, skipping orphan cleanup"); 1986 return; 1987 } 1988 1989 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 1990 if (es->s_last_orphan) 1991 jbd_debug(1, "Errors on filesystem, " 1992 "clearing orphan list.\n"); 1993 es->s_last_orphan = 0; 1994 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1995 return; 1996 } 1997 1998 if (s_flags & MS_RDONLY) { 1999 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 2000 sb->s_flags &= ~MS_RDONLY; 2001 } 2002 #ifdef CONFIG_QUOTA 2003 /* Needed for iput() to work correctly and not trash data */ 2004 sb->s_flags |= MS_ACTIVE; 2005 /* Turn on quotas so that they are updated correctly */ 2006 for (i = 0; i < MAXQUOTAS; i++) { 2007 if (EXT4_SB(sb)->s_qf_names[i]) { 2008 int ret = ext4_quota_on_mount(sb, i); 2009 if (ret < 0) 2010 ext4_msg(sb, KERN_ERR, 2011 "Cannot turn on journaled " 2012 "quota: error %d", ret); 2013 } 2014 } 2015 #endif 2016 2017 while (es->s_last_orphan) { 2018 struct inode *inode; 2019 2020 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2021 if (IS_ERR(inode)) { 2022 es->s_last_orphan = 0; 2023 break; 2024 } 2025 2026 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2027 dquot_initialize(inode); 2028 if (inode->i_nlink) { 2029 ext4_msg(sb, KERN_DEBUG, 2030 "%s: truncating inode %lu to %lld bytes", 2031 __func__, inode->i_ino, inode->i_size); 2032 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2033 inode->i_ino, inode->i_size); 2034 ext4_truncate(inode); 2035 nr_truncates++; 2036 } else { 2037 ext4_msg(sb, KERN_DEBUG, 2038 "%s: deleting unreferenced inode %lu", 2039 __func__, inode->i_ino); 2040 jbd_debug(2, "deleting unreferenced inode %lu\n", 2041 inode->i_ino); 2042 nr_orphans++; 2043 } 2044 iput(inode); /* The delete magic happens here! */ 2045 } 2046 2047 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2048 2049 if (nr_orphans) 2050 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2051 PLURAL(nr_orphans)); 2052 if (nr_truncates) 2053 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2054 PLURAL(nr_truncates)); 2055 #ifdef CONFIG_QUOTA 2056 /* Turn quotas off */ 2057 for (i = 0; i < MAXQUOTAS; i++) { 2058 if (sb_dqopt(sb)->files[i]) 2059 dquot_quota_off(sb, i); 2060 } 2061 #endif 2062 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2063 } 2064 2065 /* 2066 * Maximal extent format file size. 2067 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2068 * extent format containers, within a sector_t, and within i_blocks 2069 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2070 * so that won't be a limiting factor. 2071 * 2072 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2073 */ 2074 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2075 { 2076 loff_t res; 2077 loff_t upper_limit = MAX_LFS_FILESIZE; 2078 2079 /* small i_blocks in vfs inode? */ 2080 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2081 /* 2082 * CONFIG_LBDAF is not enabled implies the inode 2083 * i_block represent total blocks in 512 bytes 2084 * 32 == size of vfs inode i_blocks * 8 2085 */ 2086 upper_limit = (1LL << 32) - 1; 2087 2088 /* total blocks in file system block size */ 2089 upper_limit >>= (blkbits - 9); 2090 upper_limit <<= blkbits; 2091 } 2092 2093 /* 32-bit extent-start container, ee_block */ 2094 res = 1LL << 32; 2095 res <<= blkbits; 2096 res -= 1; 2097 2098 /* Sanity check against vm- & vfs- imposed limits */ 2099 if (res > upper_limit) 2100 res = upper_limit; 2101 2102 return res; 2103 } 2104 2105 /* 2106 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2107 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2108 * We need to be 1 filesystem block less than the 2^48 sector limit. 2109 */ 2110 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2111 { 2112 loff_t res = EXT4_NDIR_BLOCKS; 2113 int meta_blocks; 2114 loff_t upper_limit; 2115 /* This is calculated to be the largest file size for a dense, block 2116 * mapped file such that the file's total number of 512-byte sectors, 2117 * including data and all indirect blocks, does not exceed (2^48 - 1). 2118 * 2119 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2120 * number of 512-byte sectors of the file. 2121 */ 2122 2123 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2124 /* 2125 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2126 * the inode i_block field represents total file blocks in 2127 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2128 */ 2129 upper_limit = (1LL << 32) - 1; 2130 2131 /* total blocks in file system block size */ 2132 upper_limit >>= (bits - 9); 2133 2134 } else { 2135 /* 2136 * We use 48 bit ext4_inode i_blocks 2137 * With EXT4_HUGE_FILE_FL set the i_blocks 2138 * represent total number of blocks in 2139 * file system block size 2140 */ 2141 upper_limit = (1LL << 48) - 1; 2142 2143 } 2144 2145 /* indirect blocks */ 2146 meta_blocks = 1; 2147 /* double indirect blocks */ 2148 meta_blocks += 1 + (1LL << (bits-2)); 2149 /* tripple indirect blocks */ 2150 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2151 2152 upper_limit -= meta_blocks; 2153 upper_limit <<= bits; 2154 2155 res += 1LL << (bits-2); 2156 res += 1LL << (2*(bits-2)); 2157 res += 1LL << (3*(bits-2)); 2158 res <<= bits; 2159 if (res > upper_limit) 2160 res = upper_limit; 2161 2162 if (res > MAX_LFS_FILESIZE) 2163 res = MAX_LFS_FILESIZE; 2164 2165 return res; 2166 } 2167 2168 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2169 ext4_fsblk_t logical_sb_block, int nr) 2170 { 2171 struct ext4_sb_info *sbi = EXT4_SB(sb); 2172 ext4_group_t bg, first_meta_bg; 2173 int has_super = 0; 2174 2175 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2176 2177 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2178 nr < first_meta_bg) 2179 return logical_sb_block + nr + 1; 2180 bg = sbi->s_desc_per_block * nr; 2181 if (ext4_bg_has_super(sb, bg)) 2182 has_super = 1; 2183 2184 return (has_super + ext4_group_first_block_no(sb, bg)); 2185 } 2186 2187 /** 2188 * ext4_get_stripe_size: Get the stripe size. 2189 * @sbi: In memory super block info 2190 * 2191 * If we have specified it via mount option, then 2192 * use the mount option value. If the value specified at mount time is 2193 * greater than the blocks per group use the super block value. 2194 * If the super block value is greater than blocks per group return 0. 2195 * Allocator needs it be less than blocks per group. 2196 * 2197 */ 2198 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2199 { 2200 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2201 unsigned long stripe_width = 2202 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2203 2204 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2205 return sbi->s_stripe; 2206 2207 if (stripe_width <= sbi->s_blocks_per_group) 2208 return stripe_width; 2209 2210 if (stride <= sbi->s_blocks_per_group) 2211 return stride; 2212 2213 return 0; 2214 } 2215 2216 /* sysfs supprt */ 2217 2218 struct ext4_attr { 2219 struct attribute attr; 2220 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2221 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2222 const char *, size_t); 2223 int offset; 2224 }; 2225 2226 static int parse_strtoul(const char *buf, 2227 unsigned long max, unsigned long *value) 2228 { 2229 char *endp; 2230 2231 *value = simple_strtoul(skip_spaces(buf), &endp, 0); 2232 endp = skip_spaces(endp); 2233 if (*endp || *value > max) 2234 return -EINVAL; 2235 2236 return 0; 2237 } 2238 2239 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2240 struct ext4_sb_info *sbi, 2241 char *buf) 2242 { 2243 return snprintf(buf, PAGE_SIZE, "%llu\n", 2244 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2245 } 2246 2247 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2248 struct ext4_sb_info *sbi, char *buf) 2249 { 2250 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2251 2252 return snprintf(buf, PAGE_SIZE, "%lu\n", 2253 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2254 sbi->s_sectors_written_start) >> 1); 2255 } 2256 2257 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2258 struct ext4_sb_info *sbi, char *buf) 2259 { 2260 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2261 2262 return snprintf(buf, PAGE_SIZE, "%llu\n", 2263 (unsigned long long)(sbi->s_kbytes_written + 2264 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2265 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2266 } 2267 2268 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2269 struct ext4_sb_info *sbi, 2270 const char *buf, size_t count) 2271 { 2272 unsigned long t; 2273 2274 if (parse_strtoul(buf, 0x40000000, &t)) 2275 return -EINVAL; 2276 2277 if (!is_power_of_2(t)) 2278 return -EINVAL; 2279 2280 sbi->s_inode_readahead_blks = t; 2281 return count; 2282 } 2283 2284 static ssize_t sbi_ui_show(struct ext4_attr *a, 2285 struct ext4_sb_info *sbi, char *buf) 2286 { 2287 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2288 2289 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2290 } 2291 2292 static ssize_t sbi_ui_store(struct ext4_attr *a, 2293 struct ext4_sb_info *sbi, 2294 const char *buf, size_t count) 2295 { 2296 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2297 unsigned long t; 2298 2299 if (parse_strtoul(buf, 0xffffffff, &t)) 2300 return -EINVAL; 2301 *ui = t; 2302 return count; 2303 } 2304 2305 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2306 static struct ext4_attr ext4_attr_##_name = { \ 2307 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2308 .show = _show, \ 2309 .store = _store, \ 2310 .offset = offsetof(struct ext4_sb_info, _elname), \ 2311 } 2312 #define EXT4_ATTR(name, mode, show, store) \ 2313 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2314 2315 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2316 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2317 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2318 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2319 #define ATTR_LIST(name) &ext4_attr_##name.attr 2320 2321 EXT4_RO_ATTR(delayed_allocation_blocks); 2322 EXT4_RO_ATTR(session_write_kbytes); 2323 EXT4_RO_ATTR(lifetime_write_kbytes); 2324 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2325 inode_readahead_blks_store, s_inode_readahead_blks); 2326 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2327 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2328 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2329 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2330 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2331 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2332 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2333 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2334 2335 static struct attribute *ext4_attrs[] = { 2336 ATTR_LIST(delayed_allocation_blocks), 2337 ATTR_LIST(session_write_kbytes), 2338 ATTR_LIST(lifetime_write_kbytes), 2339 ATTR_LIST(inode_readahead_blks), 2340 ATTR_LIST(inode_goal), 2341 ATTR_LIST(mb_stats), 2342 ATTR_LIST(mb_max_to_scan), 2343 ATTR_LIST(mb_min_to_scan), 2344 ATTR_LIST(mb_order2_req), 2345 ATTR_LIST(mb_stream_req), 2346 ATTR_LIST(mb_group_prealloc), 2347 ATTR_LIST(max_writeback_mb_bump), 2348 NULL, 2349 }; 2350 2351 static ssize_t ext4_attr_show(struct kobject *kobj, 2352 struct attribute *attr, char *buf) 2353 { 2354 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2355 s_kobj); 2356 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2357 2358 return a->show ? a->show(a, sbi, buf) : 0; 2359 } 2360 2361 static ssize_t ext4_attr_store(struct kobject *kobj, 2362 struct attribute *attr, 2363 const char *buf, size_t len) 2364 { 2365 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2366 s_kobj); 2367 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2368 2369 return a->store ? a->store(a, sbi, buf, len) : 0; 2370 } 2371 2372 static void ext4_sb_release(struct kobject *kobj) 2373 { 2374 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2375 s_kobj); 2376 complete(&sbi->s_kobj_unregister); 2377 } 2378 2379 2380 static const struct sysfs_ops ext4_attr_ops = { 2381 .show = ext4_attr_show, 2382 .store = ext4_attr_store, 2383 }; 2384 2385 static struct kobj_type ext4_ktype = { 2386 .default_attrs = ext4_attrs, 2387 .sysfs_ops = &ext4_attr_ops, 2388 .release = ext4_sb_release, 2389 }; 2390 2391 /* 2392 * Check whether this filesystem can be mounted based on 2393 * the features present and the RDONLY/RDWR mount requested. 2394 * Returns 1 if this filesystem can be mounted as requested, 2395 * 0 if it cannot be. 2396 */ 2397 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2398 { 2399 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2400 ext4_msg(sb, KERN_ERR, 2401 "Couldn't mount because of " 2402 "unsupported optional features (%x)", 2403 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2404 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2405 return 0; 2406 } 2407 2408 if (readonly) 2409 return 1; 2410 2411 /* Check that feature set is OK for a read-write mount */ 2412 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2413 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2414 "unsupported optional features (%x)", 2415 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2416 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2417 return 0; 2418 } 2419 /* 2420 * Large file size enabled file system can only be mounted 2421 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2422 */ 2423 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2424 if (sizeof(blkcnt_t) < sizeof(u64)) { 2425 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2426 "cannot be mounted RDWR without " 2427 "CONFIG_LBDAF"); 2428 return 0; 2429 } 2430 } 2431 return 1; 2432 } 2433 2434 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2435 __releases(kernel_lock) 2436 __acquires(kernel_lock) 2437 { 2438 char *orig_data = kstrdup(data, GFP_KERNEL); 2439 struct buffer_head *bh; 2440 struct ext4_super_block *es = NULL; 2441 struct ext4_sb_info *sbi; 2442 ext4_fsblk_t block; 2443 ext4_fsblk_t sb_block = get_sb_block(&data); 2444 ext4_fsblk_t logical_sb_block; 2445 unsigned long offset = 0; 2446 unsigned long journal_devnum = 0; 2447 unsigned long def_mount_opts; 2448 struct inode *root; 2449 char *cp; 2450 const char *descr; 2451 int ret = -EINVAL; 2452 int blocksize; 2453 unsigned int db_count; 2454 unsigned int i; 2455 int needs_recovery, has_huge_files; 2456 __u64 blocks_count; 2457 int err; 2458 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 2459 2460 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2461 if (!sbi) 2462 return -ENOMEM; 2463 2464 sbi->s_blockgroup_lock = 2465 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 2466 if (!sbi->s_blockgroup_lock) { 2467 kfree(sbi); 2468 return -ENOMEM; 2469 } 2470 sb->s_fs_info = sbi; 2471 sbi->s_mount_opt = 0; 2472 sbi->s_resuid = EXT4_DEF_RESUID; 2473 sbi->s_resgid = EXT4_DEF_RESGID; 2474 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2475 sbi->s_sb_block = sb_block; 2476 sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, 2477 sectors[1]); 2478 2479 unlock_kernel(); 2480 2481 /* Cleanup superblock name */ 2482 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 2483 *cp = '!'; 2484 2485 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 2486 if (!blocksize) { 2487 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 2488 goto out_fail; 2489 } 2490 2491 /* 2492 * The ext4 superblock will not be buffer aligned for other than 1kB 2493 * block sizes. We need to calculate the offset from buffer start. 2494 */ 2495 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 2496 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2497 offset = do_div(logical_sb_block, blocksize); 2498 } else { 2499 logical_sb_block = sb_block; 2500 } 2501 2502 if (!(bh = sb_bread(sb, logical_sb_block))) { 2503 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 2504 goto out_fail; 2505 } 2506 /* 2507 * Note: s_es must be initialized as soon as possible because 2508 * some ext4 macro-instructions depend on its value 2509 */ 2510 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 2511 sbi->s_es = es; 2512 sb->s_magic = le16_to_cpu(es->s_magic); 2513 if (sb->s_magic != EXT4_SUPER_MAGIC) 2514 goto cantfind_ext4; 2515 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 2516 2517 /* Set defaults before we parse the mount options */ 2518 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2519 if (def_mount_opts & EXT4_DEFM_DEBUG) 2520 set_opt(sbi->s_mount_opt, DEBUG); 2521 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 2522 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 2523 "2.6.38"); 2524 set_opt(sbi->s_mount_opt, GRPID); 2525 } 2526 if (def_mount_opts & EXT4_DEFM_UID16) 2527 set_opt(sbi->s_mount_opt, NO_UID32); 2528 #ifdef CONFIG_EXT4_FS_XATTR 2529 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 2530 set_opt(sbi->s_mount_opt, XATTR_USER); 2531 #endif 2532 #ifdef CONFIG_EXT4_FS_POSIX_ACL 2533 if (def_mount_opts & EXT4_DEFM_ACL) 2534 set_opt(sbi->s_mount_opt, POSIX_ACL); 2535 #endif 2536 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 2537 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2538 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 2539 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2540 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 2541 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2542 2543 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 2544 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 2545 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 2546 set_opt(sbi->s_mount_opt, ERRORS_CONT); 2547 else 2548 set_opt(sbi->s_mount_opt, ERRORS_RO); 2549 2550 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 2551 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 2552 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2553 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2554 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2555 2556 set_opt(sbi->s_mount_opt, BARRIER); 2557 2558 /* 2559 * enable delayed allocation by default 2560 * Use -o nodelalloc to turn it off 2561 */ 2562 if (!IS_EXT3_SB(sb)) 2563 set_opt(sbi->s_mount_opt, DELALLOC); 2564 2565 if (!parse_options((char *) data, sb, &journal_devnum, 2566 &journal_ioprio, NULL, 0)) 2567 goto failed_mount; 2568 2569 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2570 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 2571 2572 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 2573 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 2574 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 2575 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 2576 ext4_msg(sb, KERN_WARNING, 2577 "feature flags set on rev 0 fs, " 2578 "running e2fsck is recommended"); 2579 2580 /* 2581 * Check feature flags regardless of the revision level, since we 2582 * previously didn't change the revision level when setting the flags, 2583 * so there is a chance incompat flags are set on a rev 0 filesystem. 2584 */ 2585 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 2586 goto failed_mount; 2587 2588 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 2589 2590 if (blocksize < EXT4_MIN_BLOCK_SIZE || 2591 blocksize > EXT4_MAX_BLOCK_SIZE) { 2592 ext4_msg(sb, KERN_ERR, 2593 "Unsupported filesystem blocksize %d", blocksize); 2594 goto failed_mount; 2595 } 2596 2597 if (sb->s_blocksize != blocksize) { 2598 /* Validate the filesystem blocksize */ 2599 if (!sb_set_blocksize(sb, blocksize)) { 2600 ext4_msg(sb, KERN_ERR, "bad block size %d", 2601 blocksize); 2602 goto failed_mount; 2603 } 2604 2605 brelse(bh); 2606 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2607 offset = do_div(logical_sb_block, blocksize); 2608 bh = sb_bread(sb, logical_sb_block); 2609 if (!bh) { 2610 ext4_msg(sb, KERN_ERR, 2611 "Can't read superblock on 2nd try"); 2612 goto failed_mount; 2613 } 2614 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2615 sbi->s_es = es; 2616 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2617 ext4_msg(sb, KERN_ERR, 2618 "Magic mismatch, very weird!"); 2619 goto failed_mount; 2620 } 2621 } 2622 2623 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 2624 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 2625 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 2626 has_huge_files); 2627 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 2628 2629 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2630 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2631 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 2632 } else { 2633 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 2634 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 2635 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2636 (!is_power_of_2(sbi->s_inode_size)) || 2637 (sbi->s_inode_size > blocksize)) { 2638 ext4_msg(sb, KERN_ERR, 2639 "unsupported inode size: %d", 2640 sbi->s_inode_size); 2641 goto failed_mount; 2642 } 2643 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2644 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 2645 } 2646 2647 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 2648 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 2649 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 2650 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 2651 !is_power_of_2(sbi->s_desc_size)) { 2652 ext4_msg(sb, KERN_ERR, 2653 "unsupported descriptor size %lu", 2654 sbi->s_desc_size); 2655 goto failed_mount; 2656 } 2657 } else 2658 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 2659 2660 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 2661 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 2662 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 2663 goto cantfind_ext4; 2664 2665 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 2666 if (sbi->s_inodes_per_block == 0) 2667 goto cantfind_ext4; 2668 sbi->s_itb_per_group = sbi->s_inodes_per_group / 2669 sbi->s_inodes_per_block; 2670 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 2671 sbi->s_sbh = bh; 2672 sbi->s_mount_state = le16_to_cpu(es->s_state); 2673 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2674 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2675 2676 for (i = 0; i < 4; i++) 2677 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2678 sbi->s_def_hash_version = es->s_def_hash_version; 2679 i = le32_to_cpu(es->s_flags); 2680 if (i & EXT2_FLAGS_UNSIGNED_HASH) 2681 sbi->s_hash_unsigned = 3; 2682 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 2683 #ifdef __CHAR_UNSIGNED__ 2684 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 2685 sbi->s_hash_unsigned = 3; 2686 #else 2687 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 2688 #endif 2689 sb->s_dirt = 1; 2690 } 2691 2692 if (sbi->s_blocks_per_group > blocksize * 8) { 2693 ext4_msg(sb, KERN_ERR, 2694 "#blocks per group too big: %lu", 2695 sbi->s_blocks_per_group); 2696 goto failed_mount; 2697 } 2698 if (sbi->s_inodes_per_group > blocksize * 8) { 2699 ext4_msg(sb, KERN_ERR, 2700 "#inodes per group too big: %lu", 2701 sbi->s_inodes_per_group); 2702 goto failed_mount; 2703 } 2704 2705 /* 2706 * Test whether we have more sectors than will fit in sector_t, 2707 * and whether the max offset is addressable by the page cache. 2708 */ 2709 if ((ext4_blocks_count(es) > 2710 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || 2711 (ext4_blocks_count(es) > 2712 (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { 2713 ext4_msg(sb, KERN_ERR, "filesystem" 2714 " too large to mount safely on this system"); 2715 if (sizeof(sector_t) < 8) 2716 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 2717 ret = -EFBIG; 2718 goto failed_mount; 2719 } 2720 2721 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2722 goto cantfind_ext4; 2723 2724 /* check blocks count against device size */ 2725 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 2726 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 2727 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 2728 "exceeds size of device (%llu blocks)", 2729 ext4_blocks_count(es), blocks_count); 2730 goto failed_mount; 2731 } 2732 2733 /* 2734 * It makes no sense for the first data block to be beyond the end 2735 * of the filesystem. 2736 */ 2737 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 2738 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 2739 "block %u is beyond end of filesystem (%llu)", 2740 le32_to_cpu(es->s_first_data_block), 2741 ext4_blocks_count(es)); 2742 goto failed_mount; 2743 } 2744 blocks_count = (ext4_blocks_count(es) - 2745 le32_to_cpu(es->s_first_data_block) + 2746 EXT4_BLOCKS_PER_GROUP(sb) - 1); 2747 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 2748 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 2749 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 2750 "(block count %llu, first data block %u, " 2751 "blocks per group %lu)", sbi->s_groups_count, 2752 ext4_blocks_count(es), 2753 le32_to_cpu(es->s_first_data_block), 2754 EXT4_BLOCKS_PER_GROUP(sb)); 2755 goto failed_mount; 2756 } 2757 sbi->s_groups_count = blocks_count; 2758 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 2759 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 2760 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2761 EXT4_DESC_PER_BLOCK(sb); 2762 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 2763 GFP_KERNEL); 2764 if (sbi->s_group_desc == NULL) { 2765 ext4_msg(sb, KERN_ERR, "not enough memory"); 2766 goto failed_mount; 2767 } 2768 2769 #ifdef CONFIG_PROC_FS 2770 if (ext4_proc_root) 2771 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2772 #endif 2773 2774 bgl_lock_init(sbi->s_blockgroup_lock); 2775 2776 for (i = 0; i < db_count; i++) { 2777 block = descriptor_loc(sb, logical_sb_block, i); 2778 sbi->s_group_desc[i] = sb_bread(sb, block); 2779 if (!sbi->s_group_desc[i]) { 2780 ext4_msg(sb, KERN_ERR, 2781 "can't read group descriptor %d", i); 2782 db_count = i; 2783 goto failed_mount2; 2784 } 2785 } 2786 if (!ext4_check_descriptors(sb)) { 2787 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 2788 goto failed_mount2; 2789 } 2790 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2791 if (!ext4_fill_flex_info(sb)) { 2792 ext4_msg(sb, KERN_ERR, 2793 "unable to initialize " 2794 "flex_bg meta info!"); 2795 goto failed_mount2; 2796 } 2797 2798 sbi->s_gdb_count = db_count; 2799 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2800 spin_lock_init(&sbi->s_next_gen_lock); 2801 2802 sbi->s_stripe = ext4_get_stripe_size(sbi); 2803 sbi->s_max_writeback_mb_bump = 128; 2804 2805 /* 2806 * set up enough so that it can read an inode 2807 */ 2808 if (!test_opt(sb, NOLOAD) && 2809 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 2810 sb->s_op = &ext4_sops; 2811 else 2812 sb->s_op = &ext4_nojournal_sops; 2813 sb->s_export_op = &ext4_export_ops; 2814 sb->s_xattr = ext4_xattr_handlers; 2815 #ifdef CONFIG_QUOTA 2816 sb->s_qcop = &ext4_qctl_operations; 2817 sb->dq_op = &ext4_quota_operations; 2818 #endif 2819 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 2820 mutex_init(&sbi->s_orphan_lock); 2821 mutex_init(&sbi->s_resize_lock); 2822 2823 sb->s_root = NULL; 2824 2825 needs_recovery = (es->s_last_orphan != 0 || 2826 EXT4_HAS_INCOMPAT_FEATURE(sb, 2827 EXT4_FEATURE_INCOMPAT_RECOVER)); 2828 2829 /* 2830 * The first inode we look at is the journal inode. Don't try 2831 * root first: it may be modified in the journal! 2832 */ 2833 if (!test_opt(sb, NOLOAD) && 2834 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2835 if (ext4_load_journal(sb, es, journal_devnum)) 2836 goto failed_mount3; 2837 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2838 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2839 ext4_msg(sb, KERN_ERR, "required journal recovery " 2840 "suppressed and not mounted read-only"); 2841 goto failed_mount_wq; 2842 } else { 2843 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 2844 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 2845 sbi->s_journal = NULL; 2846 needs_recovery = 0; 2847 goto no_journal; 2848 } 2849 2850 if (ext4_blocks_count(es) > 0xffffffffULL && 2851 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 2852 JBD2_FEATURE_INCOMPAT_64BIT)) { 2853 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 2854 goto failed_mount_wq; 2855 } 2856 2857 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2858 jbd2_journal_set_features(sbi->s_journal, 2859 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2860 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2861 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2862 jbd2_journal_set_features(sbi->s_journal, 2863 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2864 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2865 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2866 } else { 2867 jbd2_journal_clear_features(sbi->s_journal, 2868 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2869 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2870 } 2871 2872 /* We have now updated the journal if required, so we can 2873 * validate the data journaling mode. */ 2874 switch (test_opt(sb, DATA_FLAGS)) { 2875 case 0: 2876 /* No mode set, assume a default based on the journal 2877 * capabilities: ORDERED_DATA if the journal can 2878 * cope, else JOURNAL_DATA 2879 */ 2880 if (jbd2_journal_check_available_features 2881 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 2882 set_opt(sbi->s_mount_opt, ORDERED_DATA); 2883 else 2884 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 2885 break; 2886 2887 case EXT4_MOUNT_ORDERED_DATA: 2888 case EXT4_MOUNT_WRITEBACK_DATA: 2889 if (!jbd2_journal_check_available_features 2890 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 2891 ext4_msg(sb, KERN_ERR, "Journal does not support " 2892 "requested data journaling mode"); 2893 goto failed_mount_wq; 2894 } 2895 default: 2896 break; 2897 } 2898 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 2899 2900 no_journal: 2901 err = percpu_counter_init(&sbi->s_freeblocks_counter, 2902 ext4_count_free_blocks(sb)); 2903 if (!err) 2904 err = percpu_counter_init(&sbi->s_freeinodes_counter, 2905 ext4_count_free_inodes(sb)); 2906 if (!err) 2907 err = percpu_counter_init(&sbi->s_dirs_counter, 2908 ext4_count_dirs(sb)); 2909 if (!err) 2910 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 2911 if (err) { 2912 ext4_msg(sb, KERN_ERR, "insufficient memory"); 2913 goto failed_mount_wq; 2914 } 2915 if (test_opt(sb, NOBH)) { 2916 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { 2917 ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " 2918 "its supported only with writeback mode"); 2919 clear_opt(sbi->s_mount_opt, NOBH); 2920 } 2921 if (test_opt(sb, DIOREAD_NOLOCK)) { 2922 ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " 2923 "not supported with nobh mode"); 2924 goto failed_mount_wq; 2925 } 2926 } 2927 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 2928 if (!EXT4_SB(sb)->dio_unwritten_wq) { 2929 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 2930 goto failed_mount_wq; 2931 } 2932 2933 /* 2934 * The jbd2_journal_load will have done any necessary log recovery, 2935 * so we can safely mount the rest of the filesystem now. 2936 */ 2937 2938 root = ext4_iget(sb, EXT4_ROOT_INO); 2939 if (IS_ERR(root)) { 2940 ext4_msg(sb, KERN_ERR, "get root inode failed"); 2941 ret = PTR_ERR(root); 2942 goto failed_mount4; 2943 } 2944 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2945 iput(root); 2946 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 2947 goto failed_mount4; 2948 } 2949 sb->s_root = d_alloc_root(root); 2950 if (!sb->s_root) { 2951 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 2952 iput(root); 2953 ret = -ENOMEM; 2954 goto failed_mount4; 2955 } 2956 2957 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 2958 2959 /* determine the minimum size of new large inodes, if present */ 2960 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2961 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2962 EXT4_GOOD_OLD_INODE_SIZE; 2963 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2964 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 2965 if (sbi->s_want_extra_isize < 2966 le16_to_cpu(es->s_want_extra_isize)) 2967 sbi->s_want_extra_isize = 2968 le16_to_cpu(es->s_want_extra_isize); 2969 if (sbi->s_want_extra_isize < 2970 le16_to_cpu(es->s_min_extra_isize)) 2971 sbi->s_want_extra_isize = 2972 le16_to_cpu(es->s_min_extra_isize); 2973 } 2974 } 2975 /* Check if enough inode space is available */ 2976 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 2977 sbi->s_inode_size) { 2978 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 2979 EXT4_GOOD_OLD_INODE_SIZE; 2980 ext4_msg(sb, KERN_INFO, "required extra inode space not" 2981 "available"); 2982 } 2983 2984 if (test_opt(sb, DELALLOC) && 2985 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 2986 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 2987 "requested data journaling mode"); 2988 clear_opt(sbi->s_mount_opt, DELALLOC); 2989 } 2990 if (test_opt(sb, DIOREAD_NOLOCK)) { 2991 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2992 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 2993 "option - requested data journaling mode"); 2994 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 2995 } 2996 if (sb->s_blocksize < PAGE_SIZE) { 2997 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 2998 "option - block size is too small"); 2999 clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); 3000 } 3001 } 3002 3003 err = ext4_setup_system_zone(sb); 3004 if (err) { 3005 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3006 "zone (%d)", err); 3007 goto failed_mount4; 3008 } 3009 3010 ext4_ext_init(sb); 3011 err = ext4_mb_init(sb, needs_recovery); 3012 if (err) { 3013 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", 3014 err); 3015 goto failed_mount4; 3016 } 3017 3018 sbi->s_kobj.kset = ext4_kset; 3019 init_completion(&sbi->s_kobj_unregister); 3020 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3021 "%s", sb->s_id); 3022 if (err) { 3023 ext4_mb_release(sb); 3024 ext4_ext_release(sb); 3025 goto failed_mount4; 3026 }; 3027 3028 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3029 ext4_orphan_cleanup(sb, es); 3030 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 3031 if (needs_recovery) { 3032 ext4_msg(sb, KERN_INFO, "recovery complete"); 3033 ext4_mark_recovery_complete(sb, es); 3034 } 3035 if (EXT4_SB(sb)->s_journal) { 3036 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 3037 descr = " journalled data mode"; 3038 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 3039 descr = " ordered data mode"; 3040 else 3041 descr = " writeback data mode"; 3042 } else 3043 descr = "out journal"; 3044 3045 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 3046 "Opts: %s", descr, orig_data); 3047 3048 lock_kernel(); 3049 kfree(orig_data); 3050 return 0; 3051 3052 cantfind_ext4: 3053 if (!silent) 3054 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3055 goto failed_mount; 3056 3057 failed_mount4: 3058 ext4_msg(sb, KERN_ERR, "mount failed"); 3059 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3060 failed_mount_wq: 3061 ext4_release_system_zone(sb); 3062 if (sbi->s_journal) { 3063 jbd2_journal_destroy(sbi->s_journal); 3064 sbi->s_journal = NULL; 3065 } 3066 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3067 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3068 percpu_counter_destroy(&sbi->s_dirs_counter); 3069 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3070 failed_mount3: 3071 if (sbi->s_flex_groups) { 3072 if (is_vmalloc_addr(sbi->s_flex_groups)) 3073 vfree(sbi->s_flex_groups); 3074 else 3075 kfree(sbi->s_flex_groups); 3076 } 3077 failed_mount2: 3078 for (i = 0; i < db_count; i++) 3079 brelse(sbi->s_group_desc[i]); 3080 kfree(sbi->s_group_desc); 3081 failed_mount: 3082 if (sbi->s_proc) { 3083 remove_proc_entry(sb->s_id, ext4_proc_root); 3084 } 3085 #ifdef CONFIG_QUOTA 3086 for (i = 0; i < MAXQUOTAS; i++) 3087 kfree(sbi->s_qf_names[i]); 3088 #endif 3089 ext4_blkdev_remove(sbi); 3090 brelse(bh); 3091 out_fail: 3092 sb->s_fs_info = NULL; 3093 kfree(sbi->s_blockgroup_lock); 3094 kfree(sbi); 3095 lock_kernel(); 3096 kfree(orig_data); 3097 return ret; 3098 } 3099 3100 /* 3101 * Setup any per-fs journal parameters now. We'll do this both on 3102 * initial mount, once the journal has been initialised but before we've 3103 * done any recovery; and again on any subsequent remount. 3104 */ 3105 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 3106 { 3107 struct ext4_sb_info *sbi = EXT4_SB(sb); 3108 3109 journal->j_commit_interval = sbi->s_commit_interval; 3110 journal->j_min_batch_time = sbi->s_min_batch_time; 3111 journal->j_max_batch_time = sbi->s_max_batch_time; 3112 3113 spin_lock(&journal->j_state_lock); 3114 if (test_opt(sb, BARRIER)) 3115 journal->j_flags |= JBD2_BARRIER; 3116 else 3117 journal->j_flags &= ~JBD2_BARRIER; 3118 if (test_opt(sb, DATA_ERR_ABORT)) 3119 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 3120 else 3121 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 3122 spin_unlock(&journal->j_state_lock); 3123 } 3124 3125 static journal_t *ext4_get_journal(struct super_block *sb, 3126 unsigned int journal_inum) 3127 { 3128 struct inode *journal_inode; 3129 journal_t *journal; 3130 3131 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3132 3133 /* First, test for the existence of a valid inode on disk. Bad 3134 * things happen if we iget() an unused inode, as the subsequent 3135 * iput() will try to delete it. */ 3136 3137 journal_inode = ext4_iget(sb, journal_inum); 3138 if (IS_ERR(journal_inode)) { 3139 ext4_msg(sb, KERN_ERR, "no journal found"); 3140 return NULL; 3141 } 3142 if (!journal_inode->i_nlink) { 3143 make_bad_inode(journal_inode); 3144 iput(journal_inode); 3145 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 3146 return NULL; 3147 } 3148 3149 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 3150 journal_inode, journal_inode->i_size); 3151 if (!S_ISREG(journal_inode->i_mode)) { 3152 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 3153 iput(journal_inode); 3154 return NULL; 3155 } 3156 3157 journal = jbd2_journal_init_inode(journal_inode); 3158 if (!journal) { 3159 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 3160 iput(journal_inode); 3161 return NULL; 3162 } 3163 journal->j_private = sb; 3164 ext4_init_journal_params(sb, journal); 3165 return journal; 3166 } 3167 3168 static journal_t *ext4_get_dev_journal(struct super_block *sb, 3169 dev_t j_dev) 3170 { 3171 struct buffer_head *bh; 3172 journal_t *journal; 3173 ext4_fsblk_t start; 3174 ext4_fsblk_t len; 3175 int hblock, blocksize; 3176 ext4_fsblk_t sb_block; 3177 unsigned long offset; 3178 struct ext4_super_block *es; 3179 struct block_device *bdev; 3180 3181 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3182 3183 bdev = ext4_blkdev_get(j_dev, sb); 3184 if (bdev == NULL) 3185 return NULL; 3186 3187 if (bd_claim(bdev, sb)) { 3188 ext4_msg(sb, KERN_ERR, 3189 "failed to claim external journal device"); 3190 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 3191 return NULL; 3192 } 3193 3194 blocksize = sb->s_blocksize; 3195 hblock = bdev_logical_block_size(bdev); 3196 if (blocksize < hblock) { 3197 ext4_msg(sb, KERN_ERR, 3198 "blocksize too small for journal device"); 3199 goto out_bdev; 3200 } 3201 3202 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 3203 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3204 set_blocksize(bdev, blocksize); 3205 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3206 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 3207 "external journal"); 3208 goto out_bdev; 3209 } 3210 3211 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3212 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3213 !(le32_to_cpu(es->s_feature_incompat) & 3214 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3215 ext4_msg(sb, KERN_ERR, "external journal has " 3216 "bad superblock"); 3217 brelse(bh); 3218 goto out_bdev; 3219 } 3220 3221 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3222 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 3223 brelse(bh); 3224 goto out_bdev; 3225 } 3226 3227 len = ext4_blocks_count(es); 3228 start = sb_block + 1; 3229 brelse(bh); /* we're done with the superblock */ 3230 3231 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3232 start, len, blocksize); 3233 if (!journal) { 3234 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 3235 goto out_bdev; 3236 } 3237 journal->j_private = sb; 3238 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3239 wait_on_buffer(journal->j_sb_buffer); 3240 if (!buffer_uptodate(journal->j_sb_buffer)) { 3241 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 3242 goto out_journal; 3243 } 3244 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3245 ext4_msg(sb, KERN_ERR, "External journal has more than one " 3246 "user (unsupported) - %d", 3247 be32_to_cpu(journal->j_superblock->s_nr_users)); 3248 goto out_journal; 3249 } 3250 EXT4_SB(sb)->journal_bdev = bdev; 3251 ext4_init_journal_params(sb, journal); 3252 return journal; 3253 3254 out_journal: 3255 jbd2_journal_destroy(journal); 3256 out_bdev: 3257 ext4_blkdev_put(bdev); 3258 return NULL; 3259 } 3260 3261 static int ext4_load_journal(struct super_block *sb, 3262 struct ext4_super_block *es, 3263 unsigned long journal_devnum) 3264 { 3265 journal_t *journal; 3266 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 3267 dev_t journal_dev; 3268 int err = 0; 3269 int really_read_only; 3270 3271 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3272 3273 if (journal_devnum && 3274 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3275 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 3276 "numbers have changed"); 3277 journal_dev = new_decode_dev(journal_devnum); 3278 } else 3279 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3280 3281 really_read_only = bdev_read_only(sb->s_bdev); 3282 3283 /* 3284 * Are we loading a blank journal or performing recovery after a 3285 * crash? For recovery, we need to check in advance whether we 3286 * can get read-write access to the device. 3287 */ 3288 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3289 if (sb->s_flags & MS_RDONLY) { 3290 ext4_msg(sb, KERN_INFO, "INFO: recovery " 3291 "required on readonly filesystem"); 3292 if (really_read_only) { 3293 ext4_msg(sb, KERN_ERR, "write access " 3294 "unavailable, cannot proceed"); 3295 return -EROFS; 3296 } 3297 ext4_msg(sb, KERN_INFO, "write access will " 3298 "be enabled during recovery"); 3299 } 3300 } 3301 3302 if (journal_inum && journal_dev) { 3303 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 3304 "and inode journals!"); 3305 return -EINVAL; 3306 } 3307 3308 if (journal_inum) { 3309 if (!(journal = ext4_get_journal(sb, journal_inum))) 3310 return -EINVAL; 3311 } else { 3312 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 3313 return -EINVAL; 3314 } 3315 3316 if (!(journal->j_flags & JBD2_BARRIER)) 3317 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3318 3319 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3320 err = jbd2_journal_update_format(journal); 3321 if (err) { 3322 ext4_msg(sb, KERN_ERR, "error updating journal"); 3323 jbd2_journal_destroy(journal); 3324 return err; 3325 } 3326 } 3327 3328 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3329 err = jbd2_journal_wipe(journal, !really_read_only); 3330 if (!err) 3331 err = jbd2_journal_load(journal); 3332 3333 if (err) { 3334 ext4_msg(sb, KERN_ERR, "error loading journal"); 3335 jbd2_journal_destroy(journal); 3336 return err; 3337 } 3338 3339 EXT4_SB(sb)->s_journal = journal; 3340 ext4_clear_journal_err(sb, es); 3341 3342 if (journal_devnum && 3343 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3344 es->s_journal_dev = cpu_to_le32(journal_devnum); 3345 3346 /* Make sure we flush the recovery flag to disk. */ 3347 ext4_commit_super(sb, 1); 3348 } 3349 3350 return 0; 3351 } 3352 3353 static int ext4_commit_super(struct super_block *sb, int sync) 3354 { 3355 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 3356 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3357 int error = 0; 3358 3359 if (!sbh) 3360 return error; 3361 if (buffer_write_io_error(sbh)) { 3362 /* 3363 * Oh, dear. A previous attempt to write the 3364 * superblock failed. This could happen because the 3365 * USB device was yanked out. Or it could happen to 3366 * be a transient write error and maybe the block will 3367 * be remapped. Nothing we can do but to retry the 3368 * write and hope for the best. 3369 */ 3370 ext4_msg(sb, KERN_ERR, "previous I/O error to " 3371 "superblock detected"); 3372 clear_buffer_write_io_error(sbh); 3373 set_buffer_uptodate(sbh); 3374 } 3375 /* 3376 * If the file system is mounted read-only, don't update the 3377 * superblock write time. This avoids updating the superblock 3378 * write time when we are mounting the root file system 3379 * read/only but we need to replay the journal; at that point, 3380 * for people who are east of GMT and who make their clock 3381 * tick in localtime for Windows bug-for-bug compatibility, 3382 * the clock is set in the future, and this will cause e2fsck 3383 * to complain and force a full file system check. 3384 */ 3385 if (!(sb->s_flags & MS_RDONLY)) 3386 es->s_wtime = cpu_to_le32(get_seconds()); 3387 es->s_kbytes_written = 3388 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3389 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3390 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3391 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3392 &EXT4_SB(sb)->s_freeblocks_counter)); 3393 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3394 &EXT4_SB(sb)->s_freeinodes_counter)); 3395 sb->s_dirt = 0; 3396 BUFFER_TRACE(sbh, "marking dirty"); 3397 mark_buffer_dirty(sbh); 3398 if (sync) { 3399 error = sync_dirty_buffer(sbh); 3400 if (error) 3401 return error; 3402 3403 error = buffer_write_io_error(sbh); 3404 if (error) { 3405 ext4_msg(sb, KERN_ERR, "I/O error while writing " 3406 "superblock"); 3407 clear_buffer_write_io_error(sbh); 3408 set_buffer_uptodate(sbh); 3409 } 3410 } 3411 return error; 3412 } 3413 3414 /* 3415 * Have we just finished recovery? If so, and if we are mounting (or 3416 * remounting) the filesystem readonly, then we will end up with a 3417 * consistent fs on disk. Record that fact. 3418 */ 3419 static void ext4_mark_recovery_complete(struct super_block *sb, 3420 struct ext4_super_block *es) 3421 { 3422 journal_t *journal = EXT4_SB(sb)->s_journal; 3423 3424 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3425 BUG_ON(journal != NULL); 3426 return; 3427 } 3428 jbd2_journal_lock_updates(journal); 3429 if (jbd2_journal_flush(journal) < 0) 3430 goto out; 3431 3432 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 3433 sb->s_flags & MS_RDONLY) { 3434 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3435 ext4_commit_super(sb, 1); 3436 } 3437 3438 out: 3439 jbd2_journal_unlock_updates(journal); 3440 } 3441 3442 /* 3443 * If we are mounting (or read-write remounting) a filesystem whose journal 3444 * has recorded an error from a previous lifetime, move that error to the 3445 * main filesystem now. 3446 */ 3447 static void ext4_clear_journal_err(struct super_block *sb, 3448 struct ext4_super_block *es) 3449 { 3450 journal_t *journal; 3451 int j_errno; 3452 const char *errstr; 3453 3454 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3455 3456 journal = EXT4_SB(sb)->s_journal; 3457 3458 /* 3459 * Now check for any error status which may have been recorded in the 3460 * journal by a prior ext4_error() or ext4_abort() 3461 */ 3462 3463 j_errno = jbd2_journal_errno(journal); 3464 if (j_errno) { 3465 char nbuf[16]; 3466 3467 errstr = ext4_decode_error(sb, j_errno, nbuf); 3468 ext4_warning(sb, "Filesystem error recorded " 3469 "from previous mount: %s", errstr); 3470 ext4_warning(sb, "Marking fs in need of filesystem check."); 3471 3472 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 3473 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 3474 ext4_commit_super(sb, 1); 3475 3476 jbd2_journal_clear_err(journal); 3477 } 3478 } 3479 3480 /* 3481 * Force the running and committing transactions to commit, 3482 * and wait on the commit. 3483 */ 3484 int ext4_force_commit(struct super_block *sb) 3485 { 3486 journal_t *journal; 3487 int ret = 0; 3488 3489 if (sb->s_flags & MS_RDONLY) 3490 return 0; 3491 3492 journal = EXT4_SB(sb)->s_journal; 3493 if (journal) { 3494 vfs_check_frozen(sb, SB_FREEZE_WRITE); 3495 ret = ext4_journal_force_commit(journal); 3496 } 3497 3498 return ret; 3499 } 3500 3501 static void ext4_write_super(struct super_block *sb) 3502 { 3503 lock_super(sb); 3504 ext4_commit_super(sb, 1); 3505 unlock_super(sb); 3506 } 3507 3508 static int ext4_sync_fs(struct super_block *sb, int wait) 3509 { 3510 int ret = 0; 3511 tid_t target; 3512 struct ext4_sb_info *sbi = EXT4_SB(sb); 3513 3514 trace_ext4_sync_fs(sb, wait); 3515 flush_workqueue(sbi->dio_unwritten_wq); 3516 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 3517 if (wait) 3518 jbd2_log_wait_commit(sbi->s_journal, target); 3519 } 3520 return ret; 3521 } 3522 3523 /* 3524 * LVM calls this function before a (read-only) snapshot is created. This 3525 * gives us a chance to flush the journal completely and mark the fs clean. 3526 */ 3527 static int ext4_freeze(struct super_block *sb) 3528 { 3529 int error = 0; 3530 journal_t *journal; 3531 3532 if (sb->s_flags & MS_RDONLY) 3533 return 0; 3534 3535 journal = EXT4_SB(sb)->s_journal; 3536 3537 /* Now we set up the journal barrier. */ 3538 jbd2_journal_lock_updates(journal); 3539 3540 /* 3541 * Don't clear the needs_recovery flag if we failed to flush 3542 * the journal. 3543 */ 3544 error = jbd2_journal_flush(journal); 3545 if (error < 0) 3546 goto out; 3547 3548 /* Journal blocked and flushed, clear needs_recovery flag. */ 3549 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3550 error = ext4_commit_super(sb, 1); 3551 out: 3552 /* we rely on s_frozen to stop further updates */ 3553 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3554 return error; 3555 } 3556 3557 /* 3558 * Called by LVM after the snapshot is done. We need to reset the RECOVER 3559 * flag here, even though the filesystem is not technically dirty yet. 3560 */ 3561 static int ext4_unfreeze(struct super_block *sb) 3562 { 3563 if (sb->s_flags & MS_RDONLY) 3564 return 0; 3565 3566 lock_super(sb); 3567 /* Reset the needs_recovery flag before the fs is unlocked. */ 3568 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3569 ext4_commit_super(sb, 1); 3570 unlock_super(sb); 3571 return 0; 3572 } 3573 3574 static int ext4_remount(struct super_block *sb, int *flags, char *data) 3575 { 3576 struct ext4_super_block *es; 3577 struct ext4_sb_info *sbi = EXT4_SB(sb); 3578 ext4_fsblk_t n_blocks_count = 0; 3579 unsigned long old_sb_flags; 3580 struct ext4_mount_options old_opts; 3581 int enable_quota = 0; 3582 ext4_group_t g; 3583 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3584 int err; 3585 #ifdef CONFIG_QUOTA 3586 int i; 3587 #endif 3588 char *orig_data = kstrdup(data, GFP_KERNEL); 3589 3590 lock_kernel(); 3591 3592 /* Store the original options */ 3593 lock_super(sb); 3594 old_sb_flags = sb->s_flags; 3595 old_opts.s_mount_opt = sbi->s_mount_opt; 3596 old_opts.s_resuid = sbi->s_resuid; 3597 old_opts.s_resgid = sbi->s_resgid; 3598 old_opts.s_commit_interval = sbi->s_commit_interval; 3599 old_opts.s_min_batch_time = sbi->s_min_batch_time; 3600 old_opts.s_max_batch_time = sbi->s_max_batch_time; 3601 #ifdef CONFIG_QUOTA 3602 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 3603 for (i = 0; i < MAXQUOTAS; i++) 3604 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 3605 #endif 3606 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 3607 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 3608 3609 /* 3610 * Allow the "check" option to be passed as a remount option. 3611 */ 3612 if (!parse_options(data, sb, NULL, &journal_ioprio, 3613 &n_blocks_count, 1)) { 3614 err = -EINVAL; 3615 goto restore_opts; 3616 } 3617 3618 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 3619 ext4_abort(sb, __func__, "Abort forced by user"); 3620 3621 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3622 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3623 3624 es = sbi->s_es; 3625 3626 if (sbi->s_journal) { 3627 ext4_init_journal_params(sb, sbi->s_journal); 3628 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3629 } 3630 3631 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 3632 n_blocks_count > ext4_blocks_count(es)) { 3633 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 3634 err = -EROFS; 3635 goto restore_opts; 3636 } 3637 3638 if (*flags & MS_RDONLY) { 3639 err = dquot_suspend(sb, -1); 3640 if (err < 0) 3641 goto restore_opts; 3642 3643 /* 3644 * First of all, the unconditional stuff we have to do 3645 * to disable replay of the journal when we next remount 3646 */ 3647 sb->s_flags |= MS_RDONLY; 3648 3649 /* 3650 * OK, test if we are remounting a valid rw partition 3651 * readonly, and if so set the rdonly flag and then 3652 * mark the partition as valid again. 3653 */ 3654 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 3655 (sbi->s_mount_state & EXT4_VALID_FS)) 3656 es->s_state = cpu_to_le16(sbi->s_mount_state); 3657 3658 if (sbi->s_journal) 3659 ext4_mark_recovery_complete(sb, es); 3660 } else { 3661 /* Make sure we can mount this feature set readwrite */ 3662 if (!ext4_feature_set_ok(sb, 0)) { 3663 err = -EROFS; 3664 goto restore_opts; 3665 } 3666 /* 3667 * Make sure the group descriptor checksums 3668 * are sane. If they aren't, refuse to remount r/w. 3669 */ 3670 for (g = 0; g < sbi->s_groups_count; g++) { 3671 struct ext4_group_desc *gdp = 3672 ext4_get_group_desc(sb, g, NULL); 3673 3674 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 3675 ext4_msg(sb, KERN_ERR, 3676 "ext4_remount: Checksum for group %u failed (%u!=%u)", 3677 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 3678 le16_to_cpu(gdp->bg_checksum)); 3679 err = -EINVAL; 3680 goto restore_opts; 3681 } 3682 } 3683 3684 /* 3685 * If we have an unprocessed orphan list hanging 3686 * around from a previously readonly bdev mount, 3687 * require a full umount/remount for now. 3688 */ 3689 if (es->s_last_orphan) { 3690 ext4_msg(sb, KERN_WARNING, "Couldn't " 3691 "remount RDWR because of unprocessed " 3692 "orphan inode list. Please " 3693 "umount/remount instead"); 3694 err = -EINVAL; 3695 goto restore_opts; 3696 } 3697 3698 /* 3699 * Mounting a RDONLY partition read-write, so reread 3700 * and store the current valid flag. (It may have 3701 * been changed by e2fsck since we originally mounted 3702 * the partition.) 3703 */ 3704 if (sbi->s_journal) 3705 ext4_clear_journal_err(sb, es); 3706 sbi->s_mount_state = le16_to_cpu(es->s_state); 3707 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3708 goto restore_opts; 3709 if (!ext4_setup_super(sb, es, 0)) 3710 sb->s_flags &= ~MS_RDONLY; 3711 enable_quota = 1; 3712 } 3713 } 3714 ext4_setup_system_zone(sb); 3715 if (sbi->s_journal == NULL) 3716 ext4_commit_super(sb, 1); 3717 3718 #ifdef CONFIG_QUOTA 3719 /* Release old quota file names */ 3720 for (i = 0; i < MAXQUOTAS; i++) 3721 if (old_opts.s_qf_names[i] && 3722 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3723 kfree(old_opts.s_qf_names[i]); 3724 #endif 3725 unlock_super(sb); 3726 unlock_kernel(); 3727 if (enable_quota) 3728 dquot_resume(sb, -1); 3729 3730 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); 3731 kfree(orig_data); 3732 return 0; 3733 3734 restore_opts: 3735 sb->s_flags = old_sb_flags; 3736 sbi->s_mount_opt = old_opts.s_mount_opt; 3737 sbi->s_resuid = old_opts.s_resuid; 3738 sbi->s_resgid = old_opts.s_resgid; 3739 sbi->s_commit_interval = old_opts.s_commit_interval; 3740 sbi->s_min_batch_time = old_opts.s_min_batch_time; 3741 sbi->s_max_batch_time = old_opts.s_max_batch_time; 3742 #ifdef CONFIG_QUOTA 3743 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 3744 for (i = 0; i < MAXQUOTAS; i++) { 3745 if (sbi->s_qf_names[i] && 3746 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 3747 kfree(sbi->s_qf_names[i]); 3748 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 3749 } 3750 #endif 3751 unlock_super(sb); 3752 unlock_kernel(); 3753 kfree(orig_data); 3754 return err; 3755 } 3756 3757 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 3758 { 3759 struct super_block *sb = dentry->d_sb; 3760 struct ext4_sb_info *sbi = EXT4_SB(sb); 3761 struct ext4_super_block *es = sbi->s_es; 3762 u64 fsid; 3763 3764 if (test_opt(sb, MINIX_DF)) { 3765 sbi->s_overhead_last = 0; 3766 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 3767 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3768 ext4_fsblk_t overhead = 0; 3769 3770 /* 3771 * Compute the overhead (FS structures). This is constant 3772 * for a given filesystem unless the number of block groups 3773 * changes so we cache the previous value until it does. 3774 */ 3775 3776 /* 3777 * All of the blocks before first_data_block are 3778 * overhead 3779 */ 3780 overhead = le32_to_cpu(es->s_first_data_block); 3781 3782 /* 3783 * Add the overhead attributed to the superblock and 3784 * block group descriptors. If the sparse superblocks 3785 * feature is turned on, then not all groups have this. 3786 */ 3787 for (i = 0; i < ngroups; i++) { 3788 overhead += ext4_bg_has_super(sb, i) + 3789 ext4_bg_num_gdb(sb, i); 3790 cond_resched(); 3791 } 3792 3793 /* 3794 * Every block group has an inode bitmap, a block 3795 * bitmap, and an inode table. 3796 */ 3797 overhead += ngroups * (2 + sbi->s_itb_per_group); 3798 sbi->s_overhead_last = overhead; 3799 smp_wmb(); 3800 sbi->s_blocks_last = ext4_blocks_count(es); 3801 } 3802 3803 buf->f_type = EXT4_SUPER_MAGIC; 3804 buf->f_bsize = sb->s_blocksize; 3805 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3806 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3807 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3808 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3809 if (buf->f_bfree < ext4_r_blocks_count(es)) 3810 buf->f_bavail = 0; 3811 buf->f_files = le32_to_cpu(es->s_inodes_count); 3812 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3813 buf->f_namelen = EXT4_NAME_LEN; 3814 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3815 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3816 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 3817 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 3818 3819 return 0; 3820 } 3821 3822 /* Helper function for writing quotas on sync - we need to start transaction 3823 * before quota file is locked for write. Otherwise the are possible deadlocks: 3824 * Process 1 Process 2 3825 * ext4_create() quota_sync() 3826 * jbd2_journal_start() write_dquot() 3827 * dquot_initialize() down(dqio_mutex) 3828 * down(dqio_mutex) jbd2_journal_start() 3829 * 3830 */ 3831 3832 #ifdef CONFIG_QUOTA 3833 3834 static inline struct inode *dquot_to_inode(struct dquot *dquot) 3835 { 3836 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 3837 } 3838 3839 static int ext4_write_dquot(struct dquot *dquot) 3840 { 3841 int ret, err; 3842 handle_t *handle; 3843 struct inode *inode; 3844 3845 inode = dquot_to_inode(dquot); 3846 handle = ext4_journal_start(inode, 3847 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 3848 if (IS_ERR(handle)) 3849 return PTR_ERR(handle); 3850 ret = dquot_commit(dquot); 3851 err = ext4_journal_stop(handle); 3852 if (!ret) 3853 ret = err; 3854 return ret; 3855 } 3856 3857 static int ext4_acquire_dquot(struct dquot *dquot) 3858 { 3859 int ret, err; 3860 handle_t *handle; 3861 3862 handle = ext4_journal_start(dquot_to_inode(dquot), 3863 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 3864 if (IS_ERR(handle)) 3865 return PTR_ERR(handle); 3866 ret = dquot_acquire(dquot); 3867 err = ext4_journal_stop(handle); 3868 if (!ret) 3869 ret = err; 3870 return ret; 3871 } 3872 3873 static int ext4_release_dquot(struct dquot *dquot) 3874 { 3875 int ret, err; 3876 handle_t *handle; 3877 3878 handle = ext4_journal_start(dquot_to_inode(dquot), 3879 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 3880 if (IS_ERR(handle)) { 3881 /* Release dquot anyway to avoid endless cycle in dqput() */ 3882 dquot_release(dquot); 3883 return PTR_ERR(handle); 3884 } 3885 ret = dquot_release(dquot); 3886 err = ext4_journal_stop(handle); 3887 if (!ret) 3888 ret = err; 3889 return ret; 3890 } 3891 3892 static int ext4_mark_dquot_dirty(struct dquot *dquot) 3893 { 3894 /* Are we journaling quotas? */ 3895 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 3896 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 3897 dquot_mark_dquot_dirty(dquot); 3898 return ext4_write_dquot(dquot); 3899 } else { 3900 return dquot_mark_dquot_dirty(dquot); 3901 } 3902 } 3903 3904 static int ext4_write_info(struct super_block *sb, int type) 3905 { 3906 int ret, err; 3907 handle_t *handle; 3908 3909 /* Data block + inode block */ 3910 handle = ext4_journal_start(sb->s_root->d_inode, 2); 3911 if (IS_ERR(handle)) 3912 return PTR_ERR(handle); 3913 ret = dquot_commit_info(sb, type); 3914 err = ext4_journal_stop(handle); 3915 if (!ret) 3916 ret = err; 3917 return ret; 3918 } 3919 3920 /* 3921 * Turn on quotas during mount time - we need to find 3922 * the quota file and such... 3923 */ 3924 static int ext4_quota_on_mount(struct super_block *sb, int type) 3925 { 3926 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 3927 EXT4_SB(sb)->s_jquota_fmt, type); 3928 } 3929 3930 /* 3931 * Standard function to be called on quota_on 3932 */ 3933 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3934 char *name) 3935 { 3936 int err; 3937 struct path path; 3938 3939 if (!test_opt(sb, QUOTA)) 3940 return -EINVAL; 3941 3942 err = kern_path(name, LOOKUP_FOLLOW, &path); 3943 if (err) 3944 return err; 3945 3946 /* Quotafile not on the same filesystem? */ 3947 if (path.mnt->mnt_sb != sb) { 3948 path_put(&path); 3949 return -EXDEV; 3950 } 3951 /* Journaling quota? */ 3952 if (EXT4_SB(sb)->s_qf_names[type]) { 3953 /* Quotafile not in fs root? */ 3954 if (path.dentry->d_parent != sb->s_root) 3955 ext4_msg(sb, KERN_WARNING, 3956 "Quota file not on filesystem root. " 3957 "Journaled quota will not work"); 3958 } 3959 3960 /* 3961 * When we journal data on quota file, we have to flush journal to see 3962 * all updates to the file when we bypass pagecache... 3963 */ 3964 if (EXT4_SB(sb)->s_journal && 3965 ext4_should_journal_data(path.dentry->d_inode)) { 3966 /* 3967 * We don't need to lock updates but journal_flush() could 3968 * otherwise be livelocked... 3969 */ 3970 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3971 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3972 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3973 if (err) { 3974 path_put(&path); 3975 return err; 3976 } 3977 } 3978 3979 err = dquot_quota_on_path(sb, type, format_id, &path); 3980 path_put(&path); 3981 return err; 3982 } 3983 3984 /* Read data from quotafile - avoid pagecache and such because we cannot afford 3985 * acquiring the locks... As quota files are never truncated and quota code 3986 * itself serializes the operations (and noone else should touch the files) 3987 * we don't have to be afraid of races */ 3988 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 3989 size_t len, loff_t off) 3990 { 3991 struct inode *inode = sb_dqopt(sb)->files[type]; 3992 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 3993 int err = 0; 3994 int offset = off & (sb->s_blocksize - 1); 3995 int tocopy; 3996 size_t toread; 3997 struct buffer_head *bh; 3998 loff_t i_size = i_size_read(inode); 3999 4000 if (off > i_size) 4001 return 0; 4002 if (off+len > i_size) 4003 len = i_size-off; 4004 toread = len; 4005 while (toread > 0) { 4006 tocopy = sb->s_blocksize - offset < toread ? 4007 sb->s_blocksize - offset : toread; 4008 bh = ext4_bread(NULL, inode, blk, 0, &err); 4009 if (err) 4010 return err; 4011 if (!bh) /* A hole? */ 4012 memset(data, 0, tocopy); 4013 else 4014 memcpy(data, bh->b_data+offset, tocopy); 4015 brelse(bh); 4016 offset = 0; 4017 toread -= tocopy; 4018 data += tocopy; 4019 blk++; 4020 } 4021 return len; 4022 } 4023 4024 /* Write to quotafile (we know the transaction is already started and has 4025 * enough credits) */ 4026 static ssize_t ext4_quota_write(struct super_block *sb, int type, 4027 const char *data, size_t len, loff_t off) 4028 { 4029 struct inode *inode = sb_dqopt(sb)->files[type]; 4030 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4031 int err = 0; 4032 int offset = off & (sb->s_blocksize - 1); 4033 int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; 4034 struct buffer_head *bh; 4035 handle_t *handle = journal_current_handle(); 4036 4037 if (EXT4_SB(sb)->s_journal && !handle) { 4038 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4039 " cancelled because transaction is not started", 4040 (unsigned long long)off, (unsigned long long)len); 4041 return -EIO; 4042 } 4043 /* 4044 * Since we account only one data block in transaction credits, 4045 * then it is impossible to cross a block boundary. 4046 */ 4047 if (sb->s_blocksize - offset < len) { 4048 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4049 " cancelled because not block aligned", 4050 (unsigned long long)off, (unsigned long long)len); 4051 return -EIO; 4052 } 4053 4054 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 4055 bh = ext4_bread(handle, inode, blk, 1, &err); 4056 if (!bh) 4057 goto out; 4058 if (journal_quota) { 4059 err = ext4_journal_get_write_access(handle, bh); 4060 if (err) { 4061 brelse(bh); 4062 goto out; 4063 } 4064 } 4065 lock_buffer(bh); 4066 memcpy(bh->b_data+offset, data, len); 4067 flush_dcache_page(bh->b_page); 4068 unlock_buffer(bh); 4069 if (journal_quota) 4070 err = ext4_handle_dirty_metadata(handle, NULL, bh); 4071 else { 4072 /* Always do at least ordered writes for quotas */ 4073 err = ext4_jbd2_file_inode(handle, inode); 4074 mark_buffer_dirty(bh); 4075 } 4076 brelse(bh); 4077 out: 4078 if (err) { 4079 mutex_unlock(&inode->i_mutex); 4080 return err; 4081 } 4082 if (inode->i_size < off + len) { 4083 i_size_write(inode, off + len); 4084 EXT4_I(inode)->i_disksize = inode->i_size; 4085 } 4086 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4087 ext4_mark_inode_dirty(handle, inode); 4088 mutex_unlock(&inode->i_mutex); 4089 return len; 4090 } 4091 4092 #endif 4093 4094 static int ext4_get_sb(struct file_system_type *fs_type, int flags, 4095 const char *dev_name, void *data, struct vfsmount *mnt) 4096 { 4097 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 4098 } 4099 4100 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4101 static struct file_system_type ext2_fs_type = { 4102 .owner = THIS_MODULE, 4103 .name = "ext2", 4104 .get_sb = ext4_get_sb, 4105 .kill_sb = kill_block_super, 4106 .fs_flags = FS_REQUIRES_DEV, 4107 }; 4108 4109 static inline void register_as_ext2(void) 4110 { 4111 int err = register_filesystem(&ext2_fs_type); 4112 if (err) 4113 printk(KERN_WARNING 4114 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 4115 } 4116 4117 static inline void unregister_as_ext2(void) 4118 { 4119 unregister_filesystem(&ext2_fs_type); 4120 } 4121 MODULE_ALIAS("ext2"); 4122 #else 4123 static inline void register_as_ext2(void) { } 4124 static inline void unregister_as_ext2(void) { } 4125 #endif 4126 4127 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4128 static inline void register_as_ext3(void) 4129 { 4130 int err = register_filesystem(&ext3_fs_type); 4131 if (err) 4132 printk(KERN_WARNING 4133 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 4134 } 4135 4136 static inline void unregister_as_ext3(void) 4137 { 4138 unregister_filesystem(&ext3_fs_type); 4139 } 4140 MODULE_ALIAS("ext3"); 4141 #else 4142 static inline void register_as_ext3(void) { } 4143 static inline void unregister_as_ext3(void) { } 4144 #endif 4145 4146 static struct file_system_type ext4_fs_type = { 4147 .owner = THIS_MODULE, 4148 .name = "ext4", 4149 .get_sb = ext4_get_sb, 4150 .kill_sb = kill_block_super, 4151 .fs_flags = FS_REQUIRES_DEV, 4152 }; 4153 4154 static int __init init_ext4_fs(void) 4155 { 4156 int err; 4157 4158 ext4_check_flag_values(); 4159 err = init_ext4_system_zone(); 4160 if (err) 4161 return err; 4162 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4163 if (!ext4_kset) 4164 goto out4; 4165 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4166 err = init_ext4_mballoc(); 4167 if (err) 4168 goto out3; 4169 4170 err = init_ext4_xattr(); 4171 if (err) 4172 goto out2; 4173 err = init_inodecache(); 4174 if (err) 4175 goto out1; 4176 register_as_ext2(); 4177 register_as_ext3(); 4178 err = register_filesystem(&ext4_fs_type); 4179 if (err) 4180 goto out; 4181 return 0; 4182 out: 4183 unregister_as_ext2(); 4184 unregister_as_ext3(); 4185 destroy_inodecache(); 4186 out1: 4187 exit_ext4_xattr(); 4188 out2: 4189 exit_ext4_mballoc(); 4190 out3: 4191 remove_proc_entry("fs/ext4", NULL); 4192 kset_unregister(ext4_kset); 4193 out4: 4194 exit_ext4_system_zone(); 4195 return err; 4196 } 4197 4198 static void __exit exit_ext4_fs(void) 4199 { 4200 unregister_as_ext2(); 4201 unregister_as_ext3(); 4202 unregister_filesystem(&ext4_fs_type); 4203 destroy_inodecache(); 4204 exit_ext4_xattr(); 4205 exit_ext4_mballoc(); 4206 remove_proc_entry("fs/ext4", NULL); 4207 kset_unregister(ext4_kset); 4208 exit_ext4_system_zone(); 4209 } 4210 4211 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4212 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4213 MODULE_LICENSE("GPL"); 4214 module_init(init_ext4_fs) 4215 module_exit(exit_ext4_fs) 4216