1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/jbd2.h> 25 #include <linux/slab.h> 26 #include <linux/init.h> 27 #include <linux/blkdev.h> 28 #include <linux/parser.h> 29 #include <linux/buffer_head.h> 30 #include <linux/exportfs.h> 31 #include <linux/vfs.h> 32 #include <linux/random.h> 33 #include <linux/mount.h> 34 #include <linux/namei.h> 35 #include <linux/quotaops.h> 36 #include <linux/seq_file.h> 37 #include <linux/proc_fs.h> 38 #include <linux/ctype.h> 39 #include <linux/log2.h> 40 #include <linux/crc16.h> 41 #include <asm/uaccess.h> 42 43 #include <linux/kthread.h> 44 #include <linux/freezer.h> 45 46 #include "ext4.h" 47 #include "ext4_jbd2.h" 48 #include "xattr.h" 49 #include "acl.h" 50 #include "mballoc.h" 51 52 #define CREATE_TRACE_POINTS 53 #include <trace/events/ext4.h> 54 55 static struct proc_dir_entry *ext4_proc_root; 56 static struct kset *ext4_kset; 57 struct ext4_lazy_init *ext4_li_info; 58 struct mutex ext4_li_mtx; 59 struct ext4_features *ext4_feat; 60 61 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 62 unsigned long journal_devnum); 63 static int ext4_commit_super(struct super_block *sb, int sync); 64 static void ext4_mark_recovery_complete(struct super_block *sb, 65 struct ext4_super_block *es); 66 static void ext4_clear_journal_err(struct super_block *sb, 67 struct ext4_super_block *es); 68 static int ext4_sync_fs(struct super_block *sb, int wait); 69 static const char *ext4_decode_error(struct super_block *sb, int errno, 70 char nbuf[16]); 71 static int ext4_remount(struct super_block *sb, int *flags, char *data); 72 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 73 static int ext4_unfreeze(struct super_block *sb); 74 static void ext4_write_super(struct super_block *sb); 75 static int ext4_freeze(struct super_block *sb); 76 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 77 const char *dev_name, void *data); 78 static void ext4_destroy_lazyinit_thread(void); 79 static void ext4_unregister_li_request(struct super_block *sb); 80 81 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 82 static struct file_system_type ext3_fs_type = { 83 .owner = THIS_MODULE, 84 .name = "ext3", 85 .mount = ext4_mount, 86 .kill_sb = kill_block_super, 87 .fs_flags = FS_REQUIRES_DEV, 88 }; 89 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 90 #else 91 #define IS_EXT3_SB(sb) (0) 92 #endif 93 94 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 95 struct ext4_group_desc *bg) 96 { 97 return le32_to_cpu(bg->bg_block_bitmap_lo) | 98 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 99 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 100 } 101 102 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 103 struct ext4_group_desc *bg) 104 { 105 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 106 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 107 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 108 } 109 110 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 111 struct ext4_group_desc *bg) 112 { 113 return le32_to_cpu(bg->bg_inode_table_lo) | 114 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 115 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 116 } 117 118 __u32 ext4_free_blks_count(struct super_block *sb, 119 struct ext4_group_desc *bg) 120 { 121 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 122 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 123 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 124 } 125 126 __u32 ext4_free_inodes_count(struct super_block *sb, 127 struct ext4_group_desc *bg) 128 { 129 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 130 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 131 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 132 } 133 134 __u32 ext4_used_dirs_count(struct super_block *sb, 135 struct ext4_group_desc *bg) 136 { 137 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 138 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 139 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 140 } 141 142 __u32 ext4_itable_unused_count(struct super_block *sb, 143 struct ext4_group_desc *bg) 144 { 145 return le16_to_cpu(bg->bg_itable_unused_lo) | 146 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 147 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 148 } 149 150 void ext4_block_bitmap_set(struct super_block *sb, 151 struct ext4_group_desc *bg, ext4_fsblk_t blk) 152 { 153 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 154 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 155 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 156 } 157 158 void ext4_inode_bitmap_set(struct super_block *sb, 159 struct ext4_group_desc *bg, ext4_fsblk_t blk) 160 { 161 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 162 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 163 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 164 } 165 166 void ext4_inode_table_set(struct super_block *sb, 167 struct ext4_group_desc *bg, ext4_fsblk_t blk) 168 { 169 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 170 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 171 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 172 } 173 174 void ext4_free_blks_set(struct super_block *sb, 175 struct ext4_group_desc *bg, __u32 count) 176 { 177 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 178 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 179 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 180 } 181 182 void ext4_free_inodes_set(struct super_block *sb, 183 struct ext4_group_desc *bg, __u32 count) 184 { 185 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 186 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 187 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 188 } 189 190 void ext4_used_dirs_set(struct super_block *sb, 191 struct ext4_group_desc *bg, __u32 count) 192 { 193 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 194 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 195 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 196 } 197 198 void ext4_itable_unused_set(struct super_block *sb, 199 struct ext4_group_desc *bg, __u32 count) 200 { 201 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 202 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 203 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 204 } 205 206 207 /* Just increment the non-pointer handle value */ 208 static handle_t *ext4_get_nojournal(void) 209 { 210 handle_t *handle = current->journal_info; 211 unsigned long ref_cnt = (unsigned long)handle; 212 213 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); 214 215 ref_cnt++; 216 handle = (handle_t *)ref_cnt; 217 218 current->journal_info = handle; 219 return handle; 220 } 221 222 223 /* Decrement the non-pointer handle value */ 224 static void ext4_put_nojournal(handle_t *handle) 225 { 226 unsigned long ref_cnt = (unsigned long)handle; 227 228 BUG_ON(ref_cnt == 0); 229 230 ref_cnt--; 231 handle = (handle_t *)ref_cnt; 232 233 current->journal_info = handle; 234 } 235 236 /* 237 * Wrappers for jbd2_journal_start/end. 238 * 239 * The only special thing we need to do here is to make sure that all 240 * journal_end calls result in the superblock being marked dirty, so 241 * that sync() will call the filesystem's write_super callback if 242 * appropriate. 243 */ 244 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 245 { 246 journal_t *journal; 247 248 if (sb->s_flags & MS_RDONLY) 249 return ERR_PTR(-EROFS); 250 251 vfs_check_frozen(sb, SB_FREEZE_TRANS); 252 /* Special case here: if the journal has aborted behind our 253 * backs (eg. EIO in the commit thread), then we still need to 254 * take the FS itself readonly cleanly. */ 255 journal = EXT4_SB(sb)->s_journal; 256 if (journal) { 257 if (is_journal_aborted(journal)) { 258 ext4_abort(sb, "Detected aborted journal"); 259 return ERR_PTR(-EROFS); 260 } 261 return jbd2_journal_start(journal, nblocks); 262 } 263 return ext4_get_nojournal(); 264 } 265 266 /* 267 * The only special thing we need to do here is to make sure that all 268 * jbd2_journal_stop calls result in the superblock being marked dirty, so 269 * that sync() will call the filesystem's write_super callback if 270 * appropriate. 271 */ 272 int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) 273 { 274 struct super_block *sb; 275 int err; 276 int rc; 277 278 if (!ext4_handle_valid(handle)) { 279 ext4_put_nojournal(handle); 280 return 0; 281 } 282 sb = handle->h_transaction->t_journal->j_private; 283 err = handle->h_err; 284 rc = jbd2_journal_stop(handle); 285 286 if (!err) 287 err = rc; 288 if (err) 289 __ext4_std_error(sb, where, line, err); 290 return err; 291 } 292 293 void ext4_journal_abort_handle(const char *caller, unsigned int line, 294 const char *err_fn, struct buffer_head *bh, 295 handle_t *handle, int err) 296 { 297 char nbuf[16]; 298 const char *errstr = ext4_decode_error(NULL, err, nbuf); 299 300 BUG_ON(!ext4_handle_valid(handle)); 301 302 if (bh) 303 BUFFER_TRACE(bh, "abort"); 304 305 if (!handle->h_err) 306 handle->h_err = err; 307 308 if (is_handle_aborted(handle)) 309 return; 310 311 printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", 312 caller, line, errstr, err_fn); 313 314 jbd2_journal_abort_handle(handle); 315 } 316 317 static void __save_error_info(struct super_block *sb, const char *func, 318 unsigned int line) 319 { 320 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 321 322 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 323 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 324 es->s_last_error_time = cpu_to_le32(get_seconds()); 325 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 326 es->s_last_error_line = cpu_to_le32(line); 327 if (!es->s_first_error_time) { 328 es->s_first_error_time = es->s_last_error_time; 329 strncpy(es->s_first_error_func, func, 330 sizeof(es->s_first_error_func)); 331 es->s_first_error_line = cpu_to_le32(line); 332 es->s_first_error_ino = es->s_last_error_ino; 333 es->s_first_error_block = es->s_last_error_block; 334 } 335 /* 336 * Start the daily error reporting function if it hasn't been 337 * started already 338 */ 339 if (!es->s_error_count) 340 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 341 es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); 342 } 343 344 static void save_error_info(struct super_block *sb, const char *func, 345 unsigned int line) 346 { 347 __save_error_info(sb, func, line); 348 ext4_commit_super(sb, 1); 349 } 350 351 352 /* Deal with the reporting of failure conditions on a filesystem such as 353 * inconsistencies detected or read IO failures. 354 * 355 * On ext2, we can store the error state of the filesystem in the 356 * superblock. That is not possible on ext4, because we may have other 357 * write ordering constraints on the superblock which prevent us from 358 * writing it out straight away; and given that the journal is about to 359 * be aborted, we can't rely on the current, or future, transactions to 360 * write out the superblock safely. 361 * 362 * We'll just use the jbd2_journal_abort() error code to record an error in 363 * the journal instead. On recovery, the journal will complain about 364 * that error until we've noted it down and cleared it. 365 */ 366 367 static void ext4_handle_error(struct super_block *sb) 368 { 369 if (sb->s_flags & MS_RDONLY) 370 return; 371 372 if (!test_opt(sb, ERRORS_CONT)) { 373 journal_t *journal = EXT4_SB(sb)->s_journal; 374 375 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 376 if (journal) 377 jbd2_journal_abort(journal, -EIO); 378 } 379 if (test_opt(sb, ERRORS_RO)) { 380 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 381 sb->s_flags |= MS_RDONLY; 382 } 383 if (test_opt(sb, ERRORS_PANIC)) 384 panic("EXT4-fs (device %s): panic forced after error\n", 385 sb->s_id); 386 } 387 388 void __ext4_error(struct super_block *sb, const char *function, 389 unsigned int line, const char *fmt, ...) 390 { 391 struct va_format vaf; 392 va_list args; 393 394 va_start(args, fmt); 395 vaf.fmt = fmt; 396 vaf.va = &args; 397 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 398 sb->s_id, function, line, current->comm, &vaf); 399 va_end(args); 400 401 ext4_handle_error(sb); 402 } 403 404 void ext4_error_inode(struct inode *inode, const char *function, 405 unsigned int line, ext4_fsblk_t block, 406 const char *fmt, ...) 407 { 408 va_list args; 409 struct va_format vaf; 410 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 411 412 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 413 es->s_last_error_block = cpu_to_le64(block); 414 save_error_info(inode->i_sb, function, line); 415 va_start(args, fmt); 416 vaf.fmt = fmt; 417 vaf.va = &args; 418 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 419 inode->i_sb->s_id, function, line, inode->i_ino); 420 if (block) 421 printk(KERN_CONT "block %llu: ", block); 422 printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); 423 va_end(args); 424 425 ext4_handle_error(inode->i_sb); 426 } 427 428 void ext4_error_file(struct file *file, const char *function, 429 unsigned int line, ext4_fsblk_t block, 430 const char *fmt, ...) 431 { 432 va_list args; 433 struct va_format vaf; 434 struct ext4_super_block *es; 435 struct inode *inode = file->f_dentry->d_inode; 436 char pathname[80], *path; 437 438 es = EXT4_SB(inode->i_sb)->s_es; 439 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 440 save_error_info(inode->i_sb, function, line); 441 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 442 if (IS_ERR(path)) 443 path = "(unknown)"; 444 printk(KERN_CRIT 445 "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 446 inode->i_sb->s_id, function, line, inode->i_ino); 447 if (block) 448 printk(KERN_CONT "block %llu: ", block); 449 va_start(args, fmt); 450 vaf.fmt = fmt; 451 vaf.va = &args; 452 printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); 453 va_end(args); 454 455 ext4_handle_error(inode->i_sb); 456 } 457 458 static const char *ext4_decode_error(struct super_block *sb, int errno, 459 char nbuf[16]) 460 { 461 char *errstr = NULL; 462 463 switch (errno) { 464 case -EIO: 465 errstr = "IO failure"; 466 break; 467 case -ENOMEM: 468 errstr = "Out of memory"; 469 break; 470 case -EROFS: 471 if (!sb || (EXT4_SB(sb)->s_journal && 472 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 473 errstr = "Journal has aborted"; 474 else 475 errstr = "Readonly filesystem"; 476 break; 477 default: 478 /* If the caller passed in an extra buffer for unknown 479 * errors, textualise them now. Else we just return 480 * NULL. */ 481 if (nbuf) { 482 /* Check for truncated error codes... */ 483 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 484 errstr = nbuf; 485 } 486 break; 487 } 488 489 return errstr; 490 } 491 492 /* __ext4_std_error decodes expected errors from journaling functions 493 * automatically and invokes the appropriate error response. */ 494 495 void __ext4_std_error(struct super_block *sb, const char *function, 496 unsigned int line, int errno) 497 { 498 char nbuf[16]; 499 const char *errstr; 500 501 /* Special case: if the error is EROFS, and we're not already 502 * inside a transaction, then there's really no point in logging 503 * an error. */ 504 if (errno == -EROFS && journal_current_handle() == NULL && 505 (sb->s_flags & MS_RDONLY)) 506 return; 507 508 errstr = ext4_decode_error(sb, errno, nbuf); 509 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", 510 sb->s_id, function, line, errstr); 511 save_error_info(sb, function, line); 512 513 ext4_handle_error(sb); 514 } 515 516 /* 517 * ext4_abort is a much stronger failure handler than ext4_error. The 518 * abort function may be used to deal with unrecoverable failures such 519 * as journal IO errors or ENOMEM at a critical moment in log management. 520 * 521 * We unconditionally force the filesystem into an ABORT|READONLY state, 522 * unless the error response on the fs has been set to panic in which 523 * case we take the easy way out and panic immediately. 524 */ 525 526 void __ext4_abort(struct super_block *sb, const char *function, 527 unsigned int line, const char *fmt, ...) 528 { 529 va_list args; 530 531 save_error_info(sb, function, line); 532 va_start(args, fmt); 533 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, 534 function, line); 535 vprintk(fmt, args); 536 printk("\n"); 537 va_end(args); 538 539 if ((sb->s_flags & MS_RDONLY) == 0) { 540 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 541 sb->s_flags |= MS_RDONLY; 542 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 543 if (EXT4_SB(sb)->s_journal) 544 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 545 save_error_info(sb, function, line); 546 } 547 if (test_opt(sb, ERRORS_PANIC)) 548 panic("EXT4-fs panic from previous error\n"); 549 } 550 551 void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) 552 { 553 struct va_format vaf; 554 va_list args; 555 556 va_start(args, fmt); 557 vaf.fmt = fmt; 558 vaf.va = &args; 559 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 560 va_end(args); 561 } 562 563 void __ext4_warning(struct super_block *sb, const char *function, 564 unsigned int line, const char *fmt, ...) 565 { 566 struct va_format vaf; 567 va_list args; 568 569 va_start(args, fmt); 570 vaf.fmt = fmt; 571 vaf.va = &args; 572 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", 573 sb->s_id, function, line, &vaf); 574 va_end(args); 575 } 576 577 void __ext4_grp_locked_error(const char *function, unsigned int line, 578 struct super_block *sb, ext4_group_t grp, 579 unsigned long ino, ext4_fsblk_t block, 580 const char *fmt, ...) 581 __releases(bitlock) 582 __acquires(bitlock) 583 { 584 struct va_format vaf; 585 va_list args; 586 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 587 588 es->s_last_error_ino = cpu_to_le32(ino); 589 es->s_last_error_block = cpu_to_le64(block); 590 __save_error_info(sb, function, line); 591 592 va_start(args, fmt); 593 594 vaf.fmt = fmt; 595 vaf.va = &args; 596 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", 597 sb->s_id, function, line, grp); 598 if (ino) 599 printk(KERN_CONT "inode %lu: ", ino); 600 if (block) 601 printk(KERN_CONT "block %llu:", (unsigned long long) block); 602 printk(KERN_CONT "%pV\n", &vaf); 603 va_end(args); 604 605 if (test_opt(sb, ERRORS_CONT)) { 606 ext4_commit_super(sb, 0); 607 return; 608 } 609 610 ext4_unlock_group(sb, grp); 611 ext4_handle_error(sb); 612 /* 613 * We only get here in the ERRORS_RO case; relocking the group 614 * may be dangerous, but nothing bad will happen since the 615 * filesystem will have already been marked read/only and the 616 * journal has been aborted. We return 1 as a hint to callers 617 * who might what to use the return value from 618 * ext4_grp_locked_error() to distinguish beween the 619 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 620 * aggressively from the ext4 function in question, with a 621 * more appropriate error code. 622 */ 623 ext4_lock_group(sb, grp); 624 return; 625 } 626 627 void ext4_update_dynamic_rev(struct super_block *sb) 628 { 629 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 630 631 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 632 return; 633 634 ext4_warning(sb, 635 "updating to rev %d because of new feature flag, " 636 "running e2fsck is recommended", 637 EXT4_DYNAMIC_REV); 638 639 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 640 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 641 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 642 /* leave es->s_feature_*compat flags alone */ 643 /* es->s_uuid will be set by e2fsck if empty */ 644 645 /* 646 * The rest of the superblock fields should be zero, and if not it 647 * means they are likely already in use, so leave them alone. We 648 * can leave it up to e2fsck to clean up any inconsistencies there. 649 */ 650 } 651 652 /* 653 * Open the external journal device 654 */ 655 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 656 { 657 struct block_device *bdev; 658 char b[BDEVNAME_SIZE]; 659 660 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 661 if (IS_ERR(bdev)) 662 goto fail; 663 return bdev; 664 665 fail: 666 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 667 __bdevname(dev, b), PTR_ERR(bdev)); 668 return NULL; 669 } 670 671 /* 672 * Release the journal device 673 */ 674 static int ext4_blkdev_put(struct block_device *bdev) 675 { 676 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 677 } 678 679 static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 680 { 681 struct block_device *bdev; 682 int ret = -ENODEV; 683 684 bdev = sbi->journal_bdev; 685 if (bdev) { 686 ret = ext4_blkdev_put(bdev); 687 sbi->journal_bdev = NULL; 688 } 689 return ret; 690 } 691 692 static inline struct inode *orphan_list_entry(struct list_head *l) 693 { 694 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 695 } 696 697 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 698 { 699 struct list_head *l; 700 701 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 702 le32_to_cpu(sbi->s_es->s_last_orphan)); 703 704 printk(KERN_ERR "sb_info orphan list:\n"); 705 list_for_each(l, &sbi->s_orphan) { 706 struct inode *inode = orphan_list_entry(l); 707 printk(KERN_ERR " " 708 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 709 inode->i_sb->s_id, inode->i_ino, inode, 710 inode->i_mode, inode->i_nlink, 711 NEXT_ORPHAN(inode)); 712 } 713 } 714 715 static void ext4_put_super(struct super_block *sb) 716 { 717 struct ext4_sb_info *sbi = EXT4_SB(sb); 718 struct ext4_super_block *es = sbi->s_es; 719 int i, err; 720 721 ext4_unregister_li_request(sb); 722 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 723 724 flush_workqueue(sbi->dio_unwritten_wq); 725 destroy_workqueue(sbi->dio_unwritten_wq); 726 727 lock_super(sb); 728 if (sb->s_dirt) 729 ext4_commit_super(sb, 1); 730 731 if (sbi->s_journal) { 732 err = jbd2_journal_destroy(sbi->s_journal); 733 sbi->s_journal = NULL; 734 if (err < 0) 735 ext4_abort(sb, "Couldn't clean up the journal"); 736 } 737 738 del_timer(&sbi->s_err_report); 739 ext4_release_system_zone(sb); 740 ext4_mb_release(sb); 741 ext4_ext_release(sb); 742 ext4_xattr_put_super(sb); 743 744 if (!(sb->s_flags & MS_RDONLY)) { 745 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 746 es->s_state = cpu_to_le16(sbi->s_mount_state); 747 ext4_commit_super(sb, 1); 748 } 749 if (sbi->s_proc) { 750 remove_proc_entry(sb->s_id, ext4_proc_root); 751 } 752 kobject_del(&sbi->s_kobj); 753 754 for (i = 0; i < sbi->s_gdb_count; i++) 755 brelse(sbi->s_group_desc[i]); 756 kfree(sbi->s_group_desc); 757 if (is_vmalloc_addr(sbi->s_flex_groups)) 758 vfree(sbi->s_flex_groups); 759 else 760 kfree(sbi->s_flex_groups); 761 percpu_counter_destroy(&sbi->s_freeblocks_counter); 762 percpu_counter_destroy(&sbi->s_freeinodes_counter); 763 percpu_counter_destroy(&sbi->s_dirs_counter); 764 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 765 brelse(sbi->s_sbh); 766 #ifdef CONFIG_QUOTA 767 for (i = 0; i < MAXQUOTAS; i++) 768 kfree(sbi->s_qf_names[i]); 769 #endif 770 771 /* Debugging code just in case the in-memory inode orphan list 772 * isn't empty. The on-disk one can be non-empty if we've 773 * detected an error and taken the fs readonly, but the 774 * in-memory list had better be clean by this point. */ 775 if (!list_empty(&sbi->s_orphan)) 776 dump_orphan_list(sb, sbi); 777 J_ASSERT(list_empty(&sbi->s_orphan)); 778 779 invalidate_bdev(sb->s_bdev); 780 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 781 /* 782 * Invalidate the journal device's buffers. We don't want them 783 * floating about in memory - the physical journal device may 784 * hotswapped, and it breaks the `ro-after' testing code. 785 */ 786 sync_blockdev(sbi->journal_bdev); 787 invalidate_bdev(sbi->journal_bdev); 788 ext4_blkdev_remove(sbi); 789 } 790 sb->s_fs_info = NULL; 791 /* 792 * Now that we are completely done shutting down the 793 * superblock, we need to actually destroy the kobject. 794 */ 795 unlock_super(sb); 796 kobject_put(&sbi->s_kobj); 797 wait_for_completion(&sbi->s_kobj_unregister); 798 kfree(sbi->s_blockgroup_lock); 799 kfree(sbi); 800 } 801 802 static struct kmem_cache *ext4_inode_cachep; 803 804 /* 805 * Called inside transaction, so use GFP_NOFS 806 */ 807 static struct inode *ext4_alloc_inode(struct super_block *sb) 808 { 809 struct ext4_inode_info *ei; 810 811 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 812 if (!ei) 813 return NULL; 814 815 ei->vfs_inode.i_version = 1; 816 ei->vfs_inode.i_data.writeback_index = 0; 817 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 818 INIT_LIST_HEAD(&ei->i_prealloc_list); 819 spin_lock_init(&ei->i_prealloc_lock); 820 ei->i_reserved_data_blocks = 0; 821 ei->i_reserved_meta_blocks = 0; 822 ei->i_allocated_meta_blocks = 0; 823 ei->i_da_metadata_calc_len = 0; 824 spin_lock_init(&(ei->i_block_reservation_lock)); 825 #ifdef CONFIG_QUOTA 826 ei->i_reserved_quota = 0; 827 #endif 828 ei->jinode = NULL; 829 INIT_LIST_HEAD(&ei->i_completed_io_list); 830 spin_lock_init(&ei->i_completed_io_lock); 831 ei->cur_aio_dio = NULL; 832 ei->i_sync_tid = 0; 833 ei->i_datasync_tid = 0; 834 atomic_set(&ei->i_ioend_count, 0); 835 836 return &ei->vfs_inode; 837 } 838 839 static int ext4_drop_inode(struct inode *inode) 840 { 841 int drop = generic_drop_inode(inode); 842 843 trace_ext4_drop_inode(inode, drop); 844 return drop; 845 } 846 847 static void ext4_i_callback(struct rcu_head *head) 848 { 849 struct inode *inode = container_of(head, struct inode, i_rcu); 850 INIT_LIST_HEAD(&inode->i_dentry); 851 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 852 } 853 854 static void ext4_destroy_inode(struct inode *inode) 855 { 856 ext4_ioend_wait(inode); 857 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 858 ext4_msg(inode->i_sb, KERN_ERR, 859 "Inode %lu (%p): orphan list check failed!", 860 inode->i_ino, EXT4_I(inode)); 861 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 862 EXT4_I(inode), sizeof(struct ext4_inode_info), 863 true); 864 dump_stack(); 865 } 866 call_rcu(&inode->i_rcu, ext4_i_callback); 867 } 868 869 static void init_once(void *foo) 870 { 871 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 872 873 INIT_LIST_HEAD(&ei->i_orphan); 874 #ifdef CONFIG_EXT4_FS_XATTR 875 init_rwsem(&ei->xattr_sem); 876 #endif 877 init_rwsem(&ei->i_data_sem); 878 inode_init_once(&ei->vfs_inode); 879 } 880 881 static int init_inodecache(void) 882 { 883 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 884 sizeof(struct ext4_inode_info), 885 0, (SLAB_RECLAIM_ACCOUNT| 886 SLAB_MEM_SPREAD), 887 init_once); 888 if (ext4_inode_cachep == NULL) 889 return -ENOMEM; 890 return 0; 891 } 892 893 static void destroy_inodecache(void) 894 { 895 kmem_cache_destroy(ext4_inode_cachep); 896 } 897 898 void ext4_clear_inode(struct inode *inode) 899 { 900 invalidate_inode_buffers(inode); 901 end_writeback(inode); 902 dquot_drop(inode); 903 ext4_discard_preallocations(inode); 904 if (EXT4_I(inode)->jinode) { 905 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 906 EXT4_I(inode)->jinode); 907 jbd2_free_inode(EXT4_I(inode)->jinode); 908 EXT4_I(inode)->jinode = NULL; 909 } 910 } 911 912 static inline void ext4_show_quota_options(struct seq_file *seq, 913 struct super_block *sb) 914 { 915 #if defined(CONFIG_QUOTA) 916 struct ext4_sb_info *sbi = EXT4_SB(sb); 917 918 if (sbi->s_jquota_fmt) { 919 char *fmtname = ""; 920 921 switch (sbi->s_jquota_fmt) { 922 case QFMT_VFS_OLD: 923 fmtname = "vfsold"; 924 break; 925 case QFMT_VFS_V0: 926 fmtname = "vfsv0"; 927 break; 928 case QFMT_VFS_V1: 929 fmtname = "vfsv1"; 930 break; 931 } 932 seq_printf(seq, ",jqfmt=%s", fmtname); 933 } 934 935 if (sbi->s_qf_names[USRQUOTA]) 936 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 937 938 if (sbi->s_qf_names[GRPQUOTA]) 939 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 940 941 if (test_opt(sb, USRQUOTA)) 942 seq_puts(seq, ",usrquota"); 943 944 if (test_opt(sb, GRPQUOTA)) 945 seq_puts(seq, ",grpquota"); 946 #endif 947 } 948 949 /* 950 * Show an option if 951 * - it's set to a non-default value OR 952 * - if the per-sb default is different from the global default 953 */ 954 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 955 { 956 int def_errors; 957 unsigned long def_mount_opts; 958 struct super_block *sb = vfs->mnt_sb; 959 struct ext4_sb_info *sbi = EXT4_SB(sb); 960 struct ext4_super_block *es = sbi->s_es; 961 962 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 963 def_errors = le16_to_cpu(es->s_errors); 964 965 if (sbi->s_sb_block != 1) 966 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 967 if (test_opt(sb, MINIX_DF)) 968 seq_puts(seq, ",minixdf"); 969 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 970 seq_puts(seq, ",grpid"); 971 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 972 seq_puts(seq, ",nogrpid"); 973 if (sbi->s_resuid != EXT4_DEF_RESUID || 974 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 975 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 976 } 977 if (sbi->s_resgid != EXT4_DEF_RESGID || 978 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 979 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 980 } 981 if (test_opt(sb, ERRORS_RO)) { 982 if (def_errors == EXT4_ERRORS_PANIC || 983 def_errors == EXT4_ERRORS_CONTINUE) { 984 seq_puts(seq, ",errors=remount-ro"); 985 } 986 } 987 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 988 seq_puts(seq, ",errors=continue"); 989 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 990 seq_puts(seq, ",errors=panic"); 991 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 992 seq_puts(seq, ",nouid32"); 993 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 994 seq_puts(seq, ",debug"); 995 if (test_opt(sb, OLDALLOC)) 996 seq_puts(seq, ",oldalloc"); 997 #ifdef CONFIG_EXT4_FS_XATTR 998 if (test_opt(sb, XATTR_USER) && 999 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 1000 seq_puts(seq, ",user_xattr"); 1001 if (!test_opt(sb, XATTR_USER) && 1002 (def_mount_opts & EXT4_DEFM_XATTR_USER)) { 1003 seq_puts(seq, ",nouser_xattr"); 1004 } 1005 #endif 1006 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1007 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1008 seq_puts(seq, ",acl"); 1009 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 1010 seq_puts(seq, ",noacl"); 1011 #endif 1012 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 1013 seq_printf(seq, ",commit=%u", 1014 (unsigned) (sbi->s_commit_interval / HZ)); 1015 } 1016 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 1017 seq_printf(seq, ",min_batch_time=%u", 1018 (unsigned) sbi->s_min_batch_time); 1019 } 1020 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1021 seq_printf(seq, ",max_batch_time=%u", 1022 (unsigned) sbi->s_min_batch_time); 1023 } 1024 1025 /* 1026 * We're changing the default of barrier mount option, so 1027 * let's always display its mount state so it's clear what its 1028 * status is. 1029 */ 1030 seq_puts(seq, ",barrier="); 1031 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 1032 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 1033 seq_puts(seq, ",journal_async_commit"); 1034 else if (test_opt(sb, JOURNAL_CHECKSUM)) 1035 seq_puts(seq, ",journal_checksum"); 1036 if (test_opt(sb, I_VERSION)) 1037 seq_puts(seq, ",i_version"); 1038 if (!test_opt(sb, DELALLOC) && 1039 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1040 seq_puts(seq, ",nodelalloc"); 1041 1042 if (test_opt(sb, MBLK_IO_SUBMIT)) 1043 seq_puts(seq, ",mblk_io_submit"); 1044 if (sbi->s_stripe) 1045 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1046 /* 1047 * journal mode get enabled in different ways 1048 * So just print the value even if we didn't specify it 1049 */ 1050 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1051 seq_puts(seq, ",data=journal"); 1052 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1053 seq_puts(seq, ",data=ordered"); 1054 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1055 seq_puts(seq, ",data=writeback"); 1056 1057 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1058 seq_printf(seq, ",inode_readahead_blks=%u", 1059 sbi->s_inode_readahead_blks); 1060 1061 if (test_opt(sb, DATA_ERR_ABORT)) 1062 seq_puts(seq, ",data_err=abort"); 1063 1064 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 1065 seq_puts(seq, ",noauto_da_alloc"); 1066 1067 if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) 1068 seq_puts(seq, ",discard"); 1069 1070 if (test_opt(sb, NOLOAD)) 1071 seq_puts(seq, ",norecovery"); 1072 1073 if (test_opt(sb, DIOREAD_NOLOCK)) 1074 seq_puts(seq, ",dioread_nolock"); 1075 1076 if (test_opt(sb, BLOCK_VALIDITY) && 1077 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1078 seq_puts(seq, ",block_validity"); 1079 1080 if (!test_opt(sb, INIT_INODE_TABLE)) 1081 seq_puts(seq, ",noinit_inode_table"); 1082 else if (sbi->s_li_wait_mult) 1083 seq_printf(seq, ",init_inode_table=%u", 1084 (unsigned) sbi->s_li_wait_mult); 1085 1086 ext4_show_quota_options(seq, sb); 1087 1088 return 0; 1089 } 1090 1091 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1092 u64 ino, u32 generation) 1093 { 1094 struct inode *inode; 1095 1096 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 1097 return ERR_PTR(-ESTALE); 1098 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 1099 return ERR_PTR(-ESTALE); 1100 1101 /* iget isn't really right if the inode is currently unallocated!! 1102 * 1103 * ext4_read_inode will return a bad_inode if the inode had been 1104 * deleted, so we should be safe. 1105 * 1106 * Currently we don't know the generation for parent directory, so 1107 * a generation of 0 means "accept any" 1108 */ 1109 inode = ext4_iget(sb, ino); 1110 if (IS_ERR(inode)) 1111 return ERR_CAST(inode); 1112 if (generation && inode->i_generation != generation) { 1113 iput(inode); 1114 return ERR_PTR(-ESTALE); 1115 } 1116 1117 return inode; 1118 } 1119 1120 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1121 int fh_len, int fh_type) 1122 { 1123 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1124 ext4_nfs_get_inode); 1125 } 1126 1127 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1128 int fh_len, int fh_type) 1129 { 1130 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1131 ext4_nfs_get_inode); 1132 } 1133 1134 /* 1135 * Try to release metadata pages (indirect blocks, directories) which are 1136 * mapped via the block device. Since these pages could have journal heads 1137 * which would prevent try_to_free_buffers() from freeing them, we must use 1138 * jbd2 layer's try_to_free_buffers() function to release them. 1139 */ 1140 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1141 gfp_t wait) 1142 { 1143 journal_t *journal = EXT4_SB(sb)->s_journal; 1144 1145 WARN_ON(PageChecked(page)); 1146 if (!page_has_buffers(page)) 1147 return 0; 1148 if (journal) 1149 return jbd2_journal_try_to_free_buffers(journal, page, 1150 wait & ~__GFP_WAIT); 1151 return try_to_free_buffers(page); 1152 } 1153 1154 #ifdef CONFIG_QUOTA 1155 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1156 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1157 1158 static int ext4_write_dquot(struct dquot *dquot); 1159 static int ext4_acquire_dquot(struct dquot *dquot); 1160 static int ext4_release_dquot(struct dquot *dquot); 1161 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1162 static int ext4_write_info(struct super_block *sb, int type); 1163 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1164 struct path *path); 1165 static int ext4_quota_off(struct super_block *sb, int type); 1166 static int ext4_quota_on_mount(struct super_block *sb, int type); 1167 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1168 size_t len, loff_t off); 1169 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1170 const char *data, size_t len, loff_t off); 1171 1172 static const struct dquot_operations ext4_quota_operations = { 1173 #ifdef CONFIG_QUOTA 1174 .get_reserved_space = ext4_get_reserved_space, 1175 #endif 1176 .write_dquot = ext4_write_dquot, 1177 .acquire_dquot = ext4_acquire_dquot, 1178 .release_dquot = ext4_release_dquot, 1179 .mark_dirty = ext4_mark_dquot_dirty, 1180 .write_info = ext4_write_info, 1181 .alloc_dquot = dquot_alloc, 1182 .destroy_dquot = dquot_destroy, 1183 }; 1184 1185 static const struct quotactl_ops ext4_qctl_operations = { 1186 .quota_on = ext4_quota_on, 1187 .quota_off = ext4_quota_off, 1188 .quota_sync = dquot_quota_sync, 1189 .get_info = dquot_get_dqinfo, 1190 .set_info = dquot_set_dqinfo, 1191 .get_dqblk = dquot_get_dqblk, 1192 .set_dqblk = dquot_set_dqblk 1193 }; 1194 #endif 1195 1196 static const struct super_operations ext4_sops = { 1197 .alloc_inode = ext4_alloc_inode, 1198 .destroy_inode = ext4_destroy_inode, 1199 .write_inode = ext4_write_inode, 1200 .dirty_inode = ext4_dirty_inode, 1201 .drop_inode = ext4_drop_inode, 1202 .evict_inode = ext4_evict_inode, 1203 .put_super = ext4_put_super, 1204 .sync_fs = ext4_sync_fs, 1205 .freeze_fs = ext4_freeze, 1206 .unfreeze_fs = ext4_unfreeze, 1207 .statfs = ext4_statfs, 1208 .remount_fs = ext4_remount, 1209 .show_options = ext4_show_options, 1210 #ifdef CONFIG_QUOTA 1211 .quota_read = ext4_quota_read, 1212 .quota_write = ext4_quota_write, 1213 #endif 1214 .bdev_try_to_free_page = bdev_try_to_free_page, 1215 }; 1216 1217 static const struct super_operations ext4_nojournal_sops = { 1218 .alloc_inode = ext4_alloc_inode, 1219 .destroy_inode = ext4_destroy_inode, 1220 .write_inode = ext4_write_inode, 1221 .dirty_inode = ext4_dirty_inode, 1222 .drop_inode = ext4_drop_inode, 1223 .evict_inode = ext4_evict_inode, 1224 .write_super = ext4_write_super, 1225 .put_super = ext4_put_super, 1226 .statfs = ext4_statfs, 1227 .remount_fs = ext4_remount, 1228 .show_options = ext4_show_options, 1229 #ifdef CONFIG_QUOTA 1230 .quota_read = ext4_quota_read, 1231 .quota_write = ext4_quota_write, 1232 #endif 1233 .bdev_try_to_free_page = bdev_try_to_free_page, 1234 }; 1235 1236 static const struct export_operations ext4_export_ops = { 1237 .fh_to_dentry = ext4_fh_to_dentry, 1238 .fh_to_parent = ext4_fh_to_parent, 1239 .get_parent = ext4_get_parent, 1240 }; 1241 1242 enum { 1243 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1244 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1245 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1246 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1247 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1248 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1249 Opt_journal_update, Opt_journal_dev, 1250 Opt_journal_checksum, Opt_journal_async_commit, 1251 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1252 Opt_data_err_abort, Opt_data_err_ignore, 1253 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1254 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1255 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1256 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1257 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1258 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1259 Opt_inode_readahead_blks, Opt_journal_ioprio, 1260 Opt_dioread_nolock, Opt_dioread_lock, 1261 Opt_discard, Opt_nodiscard, 1262 Opt_init_inode_table, Opt_noinit_inode_table, 1263 }; 1264 1265 static const match_table_t tokens = { 1266 {Opt_bsd_df, "bsddf"}, 1267 {Opt_minix_df, "minixdf"}, 1268 {Opt_grpid, "grpid"}, 1269 {Opt_grpid, "bsdgroups"}, 1270 {Opt_nogrpid, "nogrpid"}, 1271 {Opt_nogrpid, "sysvgroups"}, 1272 {Opt_resgid, "resgid=%u"}, 1273 {Opt_resuid, "resuid=%u"}, 1274 {Opt_sb, "sb=%u"}, 1275 {Opt_err_cont, "errors=continue"}, 1276 {Opt_err_panic, "errors=panic"}, 1277 {Opt_err_ro, "errors=remount-ro"}, 1278 {Opt_nouid32, "nouid32"}, 1279 {Opt_debug, "debug"}, 1280 {Opt_oldalloc, "oldalloc"}, 1281 {Opt_orlov, "orlov"}, 1282 {Opt_user_xattr, "user_xattr"}, 1283 {Opt_nouser_xattr, "nouser_xattr"}, 1284 {Opt_acl, "acl"}, 1285 {Opt_noacl, "noacl"}, 1286 {Opt_noload, "noload"}, 1287 {Opt_noload, "norecovery"}, 1288 {Opt_nobh, "nobh"}, 1289 {Opt_bh, "bh"}, 1290 {Opt_commit, "commit=%u"}, 1291 {Opt_min_batch_time, "min_batch_time=%u"}, 1292 {Opt_max_batch_time, "max_batch_time=%u"}, 1293 {Opt_journal_update, "journal=update"}, 1294 {Opt_journal_dev, "journal_dev=%u"}, 1295 {Opt_journal_checksum, "journal_checksum"}, 1296 {Opt_journal_async_commit, "journal_async_commit"}, 1297 {Opt_abort, "abort"}, 1298 {Opt_data_journal, "data=journal"}, 1299 {Opt_data_ordered, "data=ordered"}, 1300 {Opt_data_writeback, "data=writeback"}, 1301 {Opt_data_err_abort, "data_err=abort"}, 1302 {Opt_data_err_ignore, "data_err=ignore"}, 1303 {Opt_offusrjquota, "usrjquota="}, 1304 {Opt_usrjquota, "usrjquota=%s"}, 1305 {Opt_offgrpjquota, "grpjquota="}, 1306 {Opt_grpjquota, "grpjquota=%s"}, 1307 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1308 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1309 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1310 {Opt_grpquota, "grpquota"}, 1311 {Opt_noquota, "noquota"}, 1312 {Opt_quota, "quota"}, 1313 {Opt_usrquota, "usrquota"}, 1314 {Opt_barrier, "barrier=%u"}, 1315 {Opt_barrier, "barrier"}, 1316 {Opt_nobarrier, "nobarrier"}, 1317 {Opt_i_version, "i_version"}, 1318 {Opt_stripe, "stripe=%u"}, 1319 {Opt_resize, "resize"}, 1320 {Opt_delalloc, "delalloc"}, 1321 {Opt_nodelalloc, "nodelalloc"}, 1322 {Opt_mblk_io_submit, "mblk_io_submit"}, 1323 {Opt_nomblk_io_submit, "nomblk_io_submit"}, 1324 {Opt_block_validity, "block_validity"}, 1325 {Opt_noblock_validity, "noblock_validity"}, 1326 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1327 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1328 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1329 {Opt_auto_da_alloc, "auto_da_alloc"}, 1330 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1331 {Opt_dioread_nolock, "dioread_nolock"}, 1332 {Opt_dioread_lock, "dioread_lock"}, 1333 {Opt_discard, "discard"}, 1334 {Opt_nodiscard, "nodiscard"}, 1335 {Opt_init_inode_table, "init_itable=%u"}, 1336 {Opt_init_inode_table, "init_itable"}, 1337 {Opt_noinit_inode_table, "noinit_itable"}, 1338 {Opt_err, NULL}, 1339 }; 1340 1341 static ext4_fsblk_t get_sb_block(void **data) 1342 { 1343 ext4_fsblk_t sb_block; 1344 char *options = (char *) *data; 1345 1346 if (!options || strncmp(options, "sb=", 3) != 0) 1347 return 1; /* Default location */ 1348 1349 options += 3; 1350 /* TODO: use simple_strtoll with >32bit ext4 */ 1351 sb_block = simple_strtoul(options, &options, 0); 1352 if (*options && *options != ',') { 1353 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1354 (char *) *data); 1355 return 1; 1356 } 1357 if (*options == ',') 1358 options++; 1359 *data = (void *) options; 1360 1361 return sb_block; 1362 } 1363 1364 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1365 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1366 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1367 1368 #ifdef CONFIG_QUOTA 1369 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1370 { 1371 struct ext4_sb_info *sbi = EXT4_SB(sb); 1372 char *qname; 1373 1374 if (sb_any_quota_loaded(sb) && 1375 !sbi->s_qf_names[qtype]) { 1376 ext4_msg(sb, KERN_ERR, 1377 "Cannot change journaled " 1378 "quota options when quota turned on"); 1379 return 0; 1380 } 1381 qname = match_strdup(args); 1382 if (!qname) { 1383 ext4_msg(sb, KERN_ERR, 1384 "Not enough memory for storing quotafile name"); 1385 return 0; 1386 } 1387 if (sbi->s_qf_names[qtype] && 1388 strcmp(sbi->s_qf_names[qtype], qname)) { 1389 ext4_msg(sb, KERN_ERR, 1390 "%s quota file already specified", QTYPE2NAME(qtype)); 1391 kfree(qname); 1392 return 0; 1393 } 1394 sbi->s_qf_names[qtype] = qname; 1395 if (strchr(sbi->s_qf_names[qtype], '/')) { 1396 ext4_msg(sb, KERN_ERR, 1397 "quotafile must be on filesystem root"); 1398 kfree(sbi->s_qf_names[qtype]); 1399 sbi->s_qf_names[qtype] = NULL; 1400 return 0; 1401 } 1402 set_opt(sb, QUOTA); 1403 return 1; 1404 } 1405 1406 static int clear_qf_name(struct super_block *sb, int qtype) 1407 { 1408 1409 struct ext4_sb_info *sbi = EXT4_SB(sb); 1410 1411 if (sb_any_quota_loaded(sb) && 1412 sbi->s_qf_names[qtype]) { 1413 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1414 " when quota turned on"); 1415 return 0; 1416 } 1417 /* 1418 * The space will be released later when all options are confirmed 1419 * to be correct 1420 */ 1421 sbi->s_qf_names[qtype] = NULL; 1422 return 1; 1423 } 1424 #endif 1425 1426 static int parse_options(char *options, struct super_block *sb, 1427 unsigned long *journal_devnum, 1428 unsigned int *journal_ioprio, 1429 ext4_fsblk_t *n_blocks_count, int is_remount) 1430 { 1431 struct ext4_sb_info *sbi = EXT4_SB(sb); 1432 char *p; 1433 substring_t args[MAX_OPT_ARGS]; 1434 int data_opt = 0; 1435 int option; 1436 #ifdef CONFIG_QUOTA 1437 int qfmt; 1438 #endif 1439 1440 if (!options) 1441 return 1; 1442 1443 while ((p = strsep(&options, ",")) != NULL) { 1444 int token; 1445 if (!*p) 1446 continue; 1447 1448 /* 1449 * Initialize args struct so we know whether arg was 1450 * found; some options take optional arguments. 1451 */ 1452 args[0].to = args[0].from = 0; 1453 token = match_token(p, tokens, args); 1454 switch (token) { 1455 case Opt_bsd_df: 1456 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1457 clear_opt(sb, MINIX_DF); 1458 break; 1459 case Opt_minix_df: 1460 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1461 set_opt(sb, MINIX_DF); 1462 1463 break; 1464 case Opt_grpid: 1465 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1466 set_opt(sb, GRPID); 1467 1468 break; 1469 case Opt_nogrpid: 1470 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1471 clear_opt(sb, GRPID); 1472 1473 break; 1474 case Opt_resuid: 1475 if (match_int(&args[0], &option)) 1476 return 0; 1477 sbi->s_resuid = option; 1478 break; 1479 case Opt_resgid: 1480 if (match_int(&args[0], &option)) 1481 return 0; 1482 sbi->s_resgid = option; 1483 break; 1484 case Opt_sb: 1485 /* handled by get_sb_block() instead of here */ 1486 /* *sb_block = match_int(&args[0]); */ 1487 break; 1488 case Opt_err_panic: 1489 clear_opt(sb, ERRORS_CONT); 1490 clear_opt(sb, ERRORS_RO); 1491 set_opt(sb, ERRORS_PANIC); 1492 break; 1493 case Opt_err_ro: 1494 clear_opt(sb, ERRORS_CONT); 1495 clear_opt(sb, ERRORS_PANIC); 1496 set_opt(sb, ERRORS_RO); 1497 break; 1498 case Opt_err_cont: 1499 clear_opt(sb, ERRORS_RO); 1500 clear_opt(sb, ERRORS_PANIC); 1501 set_opt(sb, ERRORS_CONT); 1502 break; 1503 case Opt_nouid32: 1504 set_opt(sb, NO_UID32); 1505 break; 1506 case Opt_debug: 1507 set_opt(sb, DEBUG); 1508 break; 1509 case Opt_oldalloc: 1510 set_opt(sb, OLDALLOC); 1511 break; 1512 case Opt_orlov: 1513 clear_opt(sb, OLDALLOC); 1514 break; 1515 #ifdef CONFIG_EXT4_FS_XATTR 1516 case Opt_user_xattr: 1517 set_opt(sb, XATTR_USER); 1518 break; 1519 case Opt_nouser_xattr: 1520 clear_opt(sb, XATTR_USER); 1521 break; 1522 #else 1523 case Opt_user_xattr: 1524 case Opt_nouser_xattr: 1525 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1526 break; 1527 #endif 1528 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1529 case Opt_acl: 1530 set_opt(sb, POSIX_ACL); 1531 break; 1532 case Opt_noacl: 1533 clear_opt(sb, POSIX_ACL); 1534 break; 1535 #else 1536 case Opt_acl: 1537 case Opt_noacl: 1538 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1539 break; 1540 #endif 1541 case Opt_journal_update: 1542 /* @@@ FIXME */ 1543 /* Eventually we will want to be able to create 1544 a journal file here. For now, only allow the 1545 user to specify an existing inode to be the 1546 journal file. */ 1547 if (is_remount) { 1548 ext4_msg(sb, KERN_ERR, 1549 "Cannot specify journal on remount"); 1550 return 0; 1551 } 1552 set_opt(sb, UPDATE_JOURNAL); 1553 break; 1554 case Opt_journal_dev: 1555 if (is_remount) { 1556 ext4_msg(sb, KERN_ERR, 1557 "Cannot specify journal on remount"); 1558 return 0; 1559 } 1560 if (match_int(&args[0], &option)) 1561 return 0; 1562 *journal_devnum = option; 1563 break; 1564 case Opt_journal_checksum: 1565 set_opt(sb, JOURNAL_CHECKSUM); 1566 break; 1567 case Opt_journal_async_commit: 1568 set_opt(sb, JOURNAL_ASYNC_COMMIT); 1569 set_opt(sb, JOURNAL_CHECKSUM); 1570 break; 1571 case Opt_noload: 1572 set_opt(sb, NOLOAD); 1573 break; 1574 case Opt_commit: 1575 if (match_int(&args[0], &option)) 1576 return 0; 1577 if (option < 0) 1578 return 0; 1579 if (option == 0) 1580 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1581 sbi->s_commit_interval = HZ * option; 1582 break; 1583 case Opt_max_batch_time: 1584 if (match_int(&args[0], &option)) 1585 return 0; 1586 if (option < 0) 1587 return 0; 1588 if (option == 0) 1589 option = EXT4_DEF_MAX_BATCH_TIME; 1590 sbi->s_max_batch_time = option; 1591 break; 1592 case Opt_min_batch_time: 1593 if (match_int(&args[0], &option)) 1594 return 0; 1595 if (option < 0) 1596 return 0; 1597 sbi->s_min_batch_time = option; 1598 break; 1599 case Opt_data_journal: 1600 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1601 goto datacheck; 1602 case Opt_data_ordered: 1603 data_opt = EXT4_MOUNT_ORDERED_DATA; 1604 goto datacheck; 1605 case Opt_data_writeback: 1606 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1607 datacheck: 1608 if (is_remount) { 1609 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1610 ext4_msg(sb, KERN_ERR, 1611 "Cannot change data mode on remount"); 1612 return 0; 1613 } 1614 } else { 1615 clear_opt(sb, DATA_FLAGS); 1616 sbi->s_mount_opt |= data_opt; 1617 } 1618 break; 1619 case Opt_data_err_abort: 1620 set_opt(sb, DATA_ERR_ABORT); 1621 break; 1622 case Opt_data_err_ignore: 1623 clear_opt(sb, DATA_ERR_ABORT); 1624 break; 1625 #ifdef CONFIG_QUOTA 1626 case Opt_usrjquota: 1627 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1628 return 0; 1629 break; 1630 case Opt_grpjquota: 1631 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1632 return 0; 1633 break; 1634 case Opt_offusrjquota: 1635 if (!clear_qf_name(sb, USRQUOTA)) 1636 return 0; 1637 break; 1638 case Opt_offgrpjquota: 1639 if (!clear_qf_name(sb, GRPQUOTA)) 1640 return 0; 1641 break; 1642 1643 case Opt_jqfmt_vfsold: 1644 qfmt = QFMT_VFS_OLD; 1645 goto set_qf_format; 1646 case Opt_jqfmt_vfsv0: 1647 qfmt = QFMT_VFS_V0; 1648 goto set_qf_format; 1649 case Opt_jqfmt_vfsv1: 1650 qfmt = QFMT_VFS_V1; 1651 set_qf_format: 1652 if (sb_any_quota_loaded(sb) && 1653 sbi->s_jquota_fmt != qfmt) { 1654 ext4_msg(sb, KERN_ERR, "Cannot change " 1655 "journaled quota options when " 1656 "quota turned on"); 1657 return 0; 1658 } 1659 sbi->s_jquota_fmt = qfmt; 1660 break; 1661 case Opt_quota: 1662 case Opt_usrquota: 1663 set_opt(sb, QUOTA); 1664 set_opt(sb, USRQUOTA); 1665 break; 1666 case Opt_grpquota: 1667 set_opt(sb, QUOTA); 1668 set_opt(sb, GRPQUOTA); 1669 break; 1670 case Opt_noquota: 1671 if (sb_any_quota_loaded(sb)) { 1672 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1673 "options when quota turned on"); 1674 return 0; 1675 } 1676 clear_opt(sb, QUOTA); 1677 clear_opt(sb, USRQUOTA); 1678 clear_opt(sb, GRPQUOTA); 1679 break; 1680 #else 1681 case Opt_quota: 1682 case Opt_usrquota: 1683 case Opt_grpquota: 1684 ext4_msg(sb, KERN_ERR, 1685 "quota options not supported"); 1686 break; 1687 case Opt_usrjquota: 1688 case Opt_grpjquota: 1689 case Opt_offusrjquota: 1690 case Opt_offgrpjquota: 1691 case Opt_jqfmt_vfsold: 1692 case Opt_jqfmt_vfsv0: 1693 case Opt_jqfmt_vfsv1: 1694 ext4_msg(sb, KERN_ERR, 1695 "journaled quota options not supported"); 1696 break; 1697 case Opt_noquota: 1698 break; 1699 #endif 1700 case Opt_abort: 1701 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1702 break; 1703 case Opt_nobarrier: 1704 clear_opt(sb, BARRIER); 1705 break; 1706 case Opt_barrier: 1707 if (args[0].from) { 1708 if (match_int(&args[0], &option)) 1709 return 0; 1710 } else 1711 option = 1; /* No argument, default to 1 */ 1712 if (option) 1713 set_opt(sb, BARRIER); 1714 else 1715 clear_opt(sb, BARRIER); 1716 break; 1717 case Opt_ignore: 1718 break; 1719 case Opt_resize: 1720 if (!is_remount) { 1721 ext4_msg(sb, KERN_ERR, 1722 "resize option only available " 1723 "for remount"); 1724 return 0; 1725 } 1726 if (match_int(&args[0], &option) != 0) 1727 return 0; 1728 *n_blocks_count = option; 1729 break; 1730 case Opt_nobh: 1731 ext4_msg(sb, KERN_WARNING, 1732 "Ignoring deprecated nobh option"); 1733 break; 1734 case Opt_bh: 1735 ext4_msg(sb, KERN_WARNING, 1736 "Ignoring deprecated bh option"); 1737 break; 1738 case Opt_i_version: 1739 set_opt(sb, I_VERSION); 1740 sb->s_flags |= MS_I_VERSION; 1741 break; 1742 case Opt_nodelalloc: 1743 clear_opt(sb, DELALLOC); 1744 break; 1745 case Opt_mblk_io_submit: 1746 set_opt(sb, MBLK_IO_SUBMIT); 1747 break; 1748 case Opt_nomblk_io_submit: 1749 clear_opt(sb, MBLK_IO_SUBMIT); 1750 break; 1751 case Opt_stripe: 1752 if (match_int(&args[0], &option)) 1753 return 0; 1754 if (option < 0) 1755 return 0; 1756 sbi->s_stripe = option; 1757 break; 1758 case Opt_delalloc: 1759 set_opt(sb, DELALLOC); 1760 break; 1761 case Opt_block_validity: 1762 set_opt(sb, BLOCK_VALIDITY); 1763 break; 1764 case Opt_noblock_validity: 1765 clear_opt(sb, BLOCK_VALIDITY); 1766 break; 1767 case Opt_inode_readahead_blks: 1768 if (match_int(&args[0], &option)) 1769 return 0; 1770 if (option < 0 || option > (1 << 30)) 1771 return 0; 1772 if (!is_power_of_2(option)) { 1773 ext4_msg(sb, KERN_ERR, 1774 "EXT4-fs: inode_readahead_blks" 1775 " must be a power of 2"); 1776 return 0; 1777 } 1778 sbi->s_inode_readahead_blks = option; 1779 break; 1780 case Opt_journal_ioprio: 1781 if (match_int(&args[0], &option)) 1782 return 0; 1783 if (option < 0 || option > 7) 1784 break; 1785 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1786 option); 1787 break; 1788 case Opt_noauto_da_alloc: 1789 set_opt(sb, NO_AUTO_DA_ALLOC); 1790 break; 1791 case Opt_auto_da_alloc: 1792 if (args[0].from) { 1793 if (match_int(&args[0], &option)) 1794 return 0; 1795 } else 1796 option = 1; /* No argument, default to 1 */ 1797 if (option) 1798 clear_opt(sb, NO_AUTO_DA_ALLOC); 1799 else 1800 set_opt(sb,NO_AUTO_DA_ALLOC); 1801 break; 1802 case Opt_discard: 1803 set_opt(sb, DISCARD); 1804 break; 1805 case Opt_nodiscard: 1806 clear_opt(sb, DISCARD); 1807 break; 1808 case Opt_dioread_nolock: 1809 set_opt(sb, DIOREAD_NOLOCK); 1810 break; 1811 case Opt_dioread_lock: 1812 clear_opt(sb, DIOREAD_NOLOCK); 1813 break; 1814 case Opt_init_inode_table: 1815 set_opt(sb, INIT_INODE_TABLE); 1816 if (args[0].from) { 1817 if (match_int(&args[0], &option)) 1818 return 0; 1819 } else 1820 option = EXT4_DEF_LI_WAIT_MULT; 1821 if (option < 0) 1822 return 0; 1823 sbi->s_li_wait_mult = option; 1824 break; 1825 case Opt_noinit_inode_table: 1826 clear_opt(sb, INIT_INODE_TABLE); 1827 break; 1828 default: 1829 ext4_msg(sb, KERN_ERR, 1830 "Unrecognized mount option \"%s\" " 1831 "or missing value", p); 1832 return 0; 1833 } 1834 } 1835 #ifdef CONFIG_QUOTA 1836 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1837 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1838 clear_opt(sb, USRQUOTA); 1839 1840 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1841 clear_opt(sb, GRPQUOTA); 1842 1843 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1844 ext4_msg(sb, KERN_ERR, "old and new quota " 1845 "format mixing"); 1846 return 0; 1847 } 1848 1849 if (!sbi->s_jquota_fmt) { 1850 ext4_msg(sb, KERN_ERR, "journaled quota format " 1851 "not specified"); 1852 return 0; 1853 } 1854 } else { 1855 if (sbi->s_jquota_fmt) { 1856 ext4_msg(sb, KERN_ERR, "journaled quota format " 1857 "specified with no journaling " 1858 "enabled"); 1859 return 0; 1860 } 1861 } 1862 #endif 1863 return 1; 1864 } 1865 1866 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1867 int read_only) 1868 { 1869 struct ext4_sb_info *sbi = EXT4_SB(sb); 1870 int res = 0; 1871 1872 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1873 ext4_msg(sb, KERN_ERR, "revision level too high, " 1874 "forcing read-only mode"); 1875 res = MS_RDONLY; 1876 } 1877 if (read_only) 1878 return res; 1879 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1880 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1881 "running e2fsck is recommended"); 1882 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1883 ext4_msg(sb, KERN_WARNING, 1884 "warning: mounting fs with errors, " 1885 "running e2fsck is recommended"); 1886 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1887 le16_to_cpu(es->s_mnt_count) >= 1888 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1889 ext4_msg(sb, KERN_WARNING, 1890 "warning: maximal mount count reached, " 1891 "running e2fsck is recommended"); 1892 else if (le32_to_cpu(es->s_checkinterval) && 1893 (le32_to_cpu(es->s_lastcheck) + 1894 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1895 ext4_msg(sb, KERN_WARNING, 1896 "warning: checktime reached, " 1897 "running e2fsck is recommended"); 1898 if (!sbi->s_journal) 1899 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1900 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1901 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1902 le16_add_cpu(&es->s_mnt_count, 1); 1903 es->s_mtime = cpu_to_le32(get_seconds()); 1904 ext4_update_dynamic_rev(sb); 1905 if (sbi->s_journal) 1906 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1907 1908 ext4_commit_super(sb, 1); 1909 if (test_opt(sb, DEBUG)) 1910 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1911 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1912 sb->s_blocksize, 1913 sbi->s_groups_count, 1914 EXT4_BLOCKS_PER_GROUP(sb), 1915 EXT4_INODES_PER_GROUP(sb), 1916 sbi->s_mount_opt, sbi->s_mount_opt2); 1917 1918 return res; 1919 } 1920 1921 static int ext4_fill_flex_info(struct super_block *sb) 1922 { 1923 struct ext4_sb_info *sbi = EXT4_SB(sb); 1924 struct ext4_group_desc *gdp = NULL; 1925 ext4_group_t flex_group_count; 1926 ext4_group_t flex_group; 1927 int groups_per_flex = 0; 1928 size_t size; 1929 int i; 1930 1931 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1932 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1933 1934 if (groups_per_flex < 2) { 1935 sbi->s_log_groups_per_flex = 0; 1936 return 1; 1937 } 1938 1939 /* We allocate both existing and potentially added groups */ 1940 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1941 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1942 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 1943 size = flex_group_count * sizeof(struct flex_groups); 1944 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 1945 if (sbi->s_flex_groups == NULL) { 1946 sbi->s_flex_groups = vzalloc(size); 1947 if (sbi->s_flex_groups == NULL) { 1948 ext4_msg(sb, KERN_ERR, 1949 "not enough memory for %u flex groups", 1950 flex_group_count); 1951 goto failed; 1952 } 1953 } 1954 1955 for (i = 0; i < sbi->s_groups_count; i++) { 1956 gdp = ext4_get_group_desc(sb, i, NULL); 1957 1958 flex_group = ext4_flex_group(sbi, i); 1959 atomic_add(ext4_free_inodes_count(sb, gdp), 1960 &sbi->s_flex_groups[flex_group].free_inodes); 1961 atomic_add(ext4_free_blks_count(sb, gdp), 1962 &sbi->s_flex_groups[flex_group].free_blocks); 1963 atomic_add(ext4_used_dirs_count(sb, gdp), 1964 &sbi->s_flex_groups[flex_group].used_dirs); 1965 } 1966 1967 return 1; 1968 failed: 1969 return 0; 1970 } 1971 1972 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1973 struct ext4_group_desc *gdp) 1974 { 1975 __u16 crc = 0; 1976 1977 if (sbi->s_es->s_feature_ro_compat & 1978 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1979 int offset = offsetof(struct ext4_group_desc, bg_checksum); 1980 __le32 le_group = cpu_to_le32(block_group); 1981 1982 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1983 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1984 crc = crc16(crc, (__u8 *)gdp, offset); 1985 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1986 /* for checksum of struct ext4_group_desc do the rest...*/ 1987 if ((sbi->s_es->s_feature_incompat & 1988 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1989 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1990 crc = crc16(crc, (__u8 *)gdp + offset, 1991 le16_to_cpu(sbi->s_es->s_desc_size) - 1992 offset); 1993 } 1994 1995 return cpu_to_le16(crc); 1996 } 1997 1998 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1999 struct ext4_group_desc *gdp) 2000 { 2001 if ((sbi->s_es->s_feature_ro_compat & 2002 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 2003 (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 2004 return 0; 2005 2006 return 1; 2007 } 2008 2009 /* Called at mount-time, super-block is locked */ 2010 static int ext4_check_descriptors(struct super_block *sb, 2011 ext4_group_t *first_not_zeroed) 2012 { 2013 struct ext4_sb_info *sbi = EXT4_SB(sb); 2014 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 2015 ext4_fsblk_t last_block; 2016 ext4_fsblk_t block_bitmap; 2017 ext4_fsblk_t inode_bitmap; 2018 ext4_fsblk_t inode_table; 2019 int flexbg_flag = 0; 2020 ext4_group_t i, grp = sbi->s_groups_count; 2021 2022 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2023 flexbg_flag = 1; 2024 2025 ext4_debug("Checking group descriptors"); 2026 2027 for (i = 0; i < sbi->s_groups_count; i++) { 2028 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 2029 2030 if (i == sbi->s_groups_count - 1 || flexbg_flag) 2031 last_block = ext4_blocks_count(sbi->s_es) - 1; 2032 else 2033 last_block = first_block + 2034 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2035 2036 if ((grp == sbi->s_groups_count) && 2037 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2038 grp = i; 2039 2040 block_bitmap = ext4_block_bitmap(sb, gdp); 2041 if (block_bitmap < first_block || block_bitmap > last_block) { 2042 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2043 "Block bitmap for group %u not in group " 2044 "(block %llu)!", i, block_bitmap); 2045 return 0; 2046 } 2047 inode_bitmap = ext4_inode_bitmap(sb, gdp); 2048 if (inode_bitmap < first_block || inode_bitmap > last_block) { 2049 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2050 "Inode bitmap for group %u not in group " 2051 "(block %llu)!", i, inode_bitmap); 2052 return 0; 2053 } 2054 inode_table = ext4_inode_table(sb, gdp); 2055 if (inode_table < first_block || 2056 inode_table + sbi->s_itb_per_group - 1 > last_block) { 2057 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2058 "Inode table for group %u not in group " 2059 "(block %llu)!", i, inode_table); 2060 return 0; 2061 } 2062 ext4_lock_group(sb, i); 2063 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 2064 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2065 "Checksum for group %u failed (%u!=%u)", 2066 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 2067 gdp)), le16_to_cpu(gdp->bg_checksum)); 2068 if (!(sb->s_flags & MS_RDONLY)) { 2069 ext4_unlock_group(sb, i); 2070 return 0; 2071 } 2072 } 2073 ext4_unlock_group(sb, i); 2074 if (!flexbg_flag) 2075 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2076 } 2077 if (NULL != first_not_zeroed) 2078 *first_not_zeroed = grp; 2079 2080 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2081 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2082 return 1; 2083 } 2084 2085 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 2086 * the superblock) which were deleted from all directories, but held open by 2087 * a process at the time of a crash. We walk the list and try to delete these 2088 * inodes at recovery time (only with a read-write filesystem). 2089 * 2090 * In order to keep the orphan inode chain consistent during traversal (in 2091 * case of crash during recovery), we link each inode into the superblock 2092 * orphan list_head and handle it the same way as an inode deletion during 2093 * normal operation (which journals the operations for us). 2094 * 2095 * We only do an iget() and an iput() on each inode, which is very safe if we 2096 * accidentally point at an in-use or already deleted inode. The worst that 2097 * can happen in this case is that we get a "bit already cleared" message from 2098 * ext4_free_inode(). The only reason we would point at a wrong inode is if 2099 * e2fsck was run on this filesystem, and it must have already done the orphan 2100 * inode cleanup for us, so we can safely abort without any further action. 2101 */ 2102 static void ext4_orphan_cleanup(struct super_block *sb, 2103 struct ext4_super_block *es) 2104 { 2105 unsigned int s_flags = sb->s_flags; 2106 int nr_orphans = 0, nr_truncates = 0; 2107 #ifdef CONFIG_QUOTA 2108 int i; 2109 #endif 2110 if (!es->s_last_orphan) { 2111 jbd_debug(4, "no orphan inodes to clean up\n"); 2112 return; 2113 } 2114 2115 if (bdev_read_only(sb->s_bdev)) { 2116 ext4_msg(sb, KERN_ERR, "write access " 2117 "unavailable, skipping orphan cleanup"); 2118 return; 2119 } 2120 2121 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2122 if (es->s_last_orphan) 2123 jbd_debug(1, "Errors on filesystem, " 2124 "clearing orphan list.\n"); 2125 es->s_last_orphan = 0; 2126 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 2127 return; 2128 } 2129 2130 if (s_flags & MS_RDONLY) { 2131 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 2132 sb->s_flags &= ~MS_RDONLY; 2133 } 2134 #ifdef CONFIG_QUOTA 2135 /* Needed for iput() to work correctly and not trash data */ 2136 sb->s_flags |= MS_ACTIVE; 2137 /* Turn on quotas so that they are updated correctly */ 2138 for (i = 0; i < MAXQUOTAS; i++) { 2139 if (EXT4_SB(sb)->s_qf_names[i]) { 2140 int ret = ext4_quota_on_mount(sb, i); 2141 if (ret < 0) 2142 ext4_msg(sb, KERN_ERR, 2143 "Cannot turn on journaled " 2144 "quota: error %d", ret); 2145 } 2146 } 2147 #endif 2148 2149 while (es->s_last_orphan) { 2150 struct inode *inode; 2151 2152 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2153 if (IS_ERR(inode)) { 2154 es->s_last_orphan = 0; 2155 break; 2156 } 2157 2158 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2159 dquot_initialize(inode); 2160 if (inode->i_nlink) { 2161 ext4_msg(sb, KERN_DEBUG, 2162 "%s: truncating inode %lu to %lld bytes", 2163 __func__, inode->i_ino, inode->i_size); 2164 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2165 inode->i_ino, inode->i_size); 2166 ext4_truncate(inode); 2167 nr_truncates++; 2168 } else { 2169 ext4_msg(sb, KERN_DEBUG, 2170 "%s: deleting unreferenced inode %lu", 2171 __func__, inode->i_ino); 2172 jbd_debug(2, "deleting unreferenced inode %lu\n", 2173 inode->i_ino); 2174 nr_orphans++; 2175 } 2176 iput(inode); /* The delete magic happens here! */ 2177 } 2178 2179 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2180 2181 if (nr_orphans) 2182 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2183 PLURAL(nr_orphans)); 2184 if (nr_truncates) 2185 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2186 PLURAL(nr_truncates)); 2187 #ifdef CONFIG_QUOTA 2188 /* Turn quotas off */ 2189 for (i = 0; i < MAXQUOTAS; i++) { 2190 if (sb_dqopt(sb)->files[i]) 2191 dquot_quota_off(sb, i); 2192 } 2193 #endif 2194 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2195 } 2196 2197 /* 2198 * Maximal extent format file size. 2199 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2200 * extent format containers, within a sector_t, and within i_blocks 2201 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2202 * so that won't be a limiting factor. 2203 * 2204 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2205 */ 2206 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2207 { 2208 loff_t res; 2209 loff_t upper_limit = MAX_LFS_FILESIZE; 2210 2211 /* small i_blocks in vfs inode? */ 2212 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2213 /* 2214 * CONFIG_LBDAF is not enabled implies the inode 2215 * i_block represent total blocks in 512 bytes 2216 * 32 == size of vfs inode i_blocks * 8 2217 */ 2218 upper_limit = (1LL << 32) - 1; 2219 2220 /* total blocks in file system block size */ 2221 upper_limit >>= (blkbits - 9); 2222 upper_limit <<= blkbits; 2223 } 2224 2225 /* 32-bit extent-start container, ee_block */ 2226 res = 1LL << 32; 2227 res <<= blkbits; 2228 res -= 1; 2229 2230 /* Sanity check against vm- & vfs- imposed limits */ 2231 if (res > upper_limit) 2232 res = upper_limit; 2233 2234 return res; 2235 } 2236 2237 /* 2238 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2239 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2240 * We need to be 1 filesystem block less than the 2^48 sector limit. 2241 */ 2242 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2243 { 2244 loff_t res = EXT4_NDIR_BLOCKS; 2245 int meta_blocks; 2246 loff_t upper_limit; 2247 /* This is calculated to be the largest file size for a dense, block 2248 * mapped file such that the file's total number of 512-byte sectors, 2249 * including data and all indirect blocks, does not exceed (2^48 - 1). 2250 * 2251 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2252 * number of 512-byte sectors of the file. 2253 */ 2254 2255 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2256 /* 2257 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2258 * the inode i_block field represents total file blocks in 2259 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2260 */ 2261 upper_limit = (1LL << 32) - 1; 2262 2263 /* total blocks in file system block size */ 2264 upper_limit >>= (bits - 9); 2265 2266 } else { 2267 /* 2268 * We use 48 bit ext4_inode i_blocks 2269 * With EXT4_HUGE_FILE_FL set the i_blocks 2270 * represent total number of blocks in 2271 * file system block size 2272 */ 2273 upper_limit = (1LL << 48) - 1; 2274 2275 } 2276 2277 /* indirect blocks */ 2278 meta_blocks = 1; 2279 /* double indirect blocks */ 2280 meta_blocks += 1 + (1LL << (bits-2)); 2281 /* tripple indirect blocks */ 2282 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2283 2284 upper_limit -= meta_blocks; 2285 upper_limit <<= bits; 2286 2287 res += 1LL << (bits-2); 2288 res += 1LL << (2*(bits-2)); 2289 res += 1LL << (3*(bits-2)); 2290 res <<= bits; 2291 if (res > upper_limit) 2292 res = upper_limit; 2293 2294 if (res > MAX_LFS_FILESIZE) 2295 res = MAX_LFS_FILESIZE; 2296 2297 return res; 2298 } 2299 2300 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2301 ext4_fsblk_t logical_sb_block, int nr) 2302 { 2303 struct ext4_sb_info *sbi = EXT4_SB(sb); 2304 ext4_group_t bg, first_meta_bg; 2305 int has_super = 0; 2306 2307 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2308 2309 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2310 nr < first_meta_bg) 2311 return logical_sb_block + nr + 1; 2312 bg = sbi->s_desc_per_block * nr; 2313 if (ext4_bg_has_super(sb, bg)) 2314 has_super = 1; 2315 2316 return (has_super + ext4_group_first_block_no(sb, bg)); 2317 } 2318 2319 /** 2320 * ext4_get_stripe_size: Get the stripe size. 2321 * @sbi: In memory super block info 2322 * 2323 * If we have specified it via mount option, then 2324 * use the mount option value. If the value specified at mount time is 2325 * greater than the blocks per group use the super block value. 2326 * If the super block value is greater than blocks per group return 0. 2327 * Allocator needs it be less than blocks per group. 2328 * 2329 */ 2330 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2331 { 2332 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2333 unsigned long stripe_width = 2334 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2335 2336 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2337 return sbi->s_stripe; 2338 2339 if (stripe_width <= sbi->s_blocks_per_group) 2340 return stripe_width; 2341 2342 if (stride <= sbi->s_blocks_per_group) 2343 return stride; 2344 2345 return 0; 2346 } 2347 2348 /* sysfs supprt */ 2349 2350 struct ext4_attr { 2351 struct attribute attr; 2352 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2353 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2354 const char *, size_t); 2355 int offset; 2356 }; 2357 2358 static int parse_strtoul(const char *buf, 2359 unsigned long max, unsigned long *value) 2360 { 2361 char *endp; 2362 2363 *value = simple_strtoul(skip_spaces(buf), &endp, 0); 2364 endp = skip_spaces(endp); 2365 if (*endp || *value > max) 2366 return -EINVAL; 2367 2368 return 0; 2369 } 2370 2371 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2372 struct ext4_sb_info *sbi, 2373 char *buf) 2374 { 2375 return snprintf(buf, PAGE_SIZE, "%llu\n", 2376 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2377 } 2378 2379 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2380 struct ext4_sb_info *sbi, char *buf) 2381 { 2382 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2383 2384 if (!sb->s_bdev->bd_part) 2385 return snprintf(buf, PAGE_SIZE, "0\n"); 2386 return snprintf(buf, PAGE_SIZE, "%lu\n", 2387 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2388 sbi->s_sectors_written_start) >> 1); 2389 } 2390 2391 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2392 struct ext4_sb_info *sbi, char *buf) 2393 { 2394 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2395 2396 if (!sb->s_bdev->bd_part) 2397 return snprintf(buf, PAGE_SIZE, "0\n"); 2398 return snprintf(buf, PAGE_SIZE, "%llu\n", 2399 (unsigned long long)(sbi->s_kbytes_written + 2400 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2401 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2402 } 2403 2404 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2405 struct ext4_sb_info *sbi, 2406 const char *buf, size_t count) 2407 { 2408 unsigned long t; 2409 2410 if (parse_strtoul(buf, 0x40000000, &t)) 2411 return -EINVAL; 2412 2413 if (!is_power_of_2(t)) 2414 return -EINVAL; 2415 2416 sbi->s_inode_readahead_blks = t; 2417 return count; 2418 } 2419 2420 static ssize_t sbi_ui_show(struct ext4_attr *a, 2421 struct ext4_sb_info *sbi, char *buf) 2422 { 2423 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2424 2425 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2426 } 2427 2428 static ssize_t sbi_ui_store(struct ext4_attr *a, 2429 struct ext4_sb_info *sbi, 2430 const char *buf, size_t count) 2431 { 2432 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); 2433 unsigned long t; 2434 2435 if (parse_strtoul(buf, 0xffffffff, &t)) 2436 return -EINVAL; 2437 *ui = t; 2438 return count; 2439 } 2440 2441 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2442 static struct ext4_attr ext4_attr_##_name = { \ 2443 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2444 .show = _show, \ 2445 .store = _store, \ 2446 .offset = offsetof(struct ext4_sb_info, _elname), \ 2447 } 2448 #define EXT4_ATTR(name, mode, show, store) \ 2449 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2450 2451 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) 2452 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2453 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2454 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2455 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2456 #define ATTR_LIST(name) &ext4_attr_##name.attr 2457 2458 EXT4_RO_ATTR(delayed_allocation_blocks); 2459 EXT4_RO_ATTR(session_write_kbytes); 2460 EXT4_RO_ATTR(lifetime_write_kbytes); 2461 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2462 inode_readahead_blks_store, s_inode_readahead_blks); 2463 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2464 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2465 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2466 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2467 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2468 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2469 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2470 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2471 2472 static struct attribute *ext4_attrs[] = { 2473 ATTR_LIST(delayed_allocation_blocks), 2474 ATTR_LIST(session_write_kbytes), 2475 ATTR_LIST(lifetime_write_kbytes), 2476 ATTR_LIST(inode_readahead_blks), 2477 ATTR_LIST(inode_goal), 2478 ATTR_LIST(mb_stats), 2479 ATTR_LIST(mb_max_to_scan), 2480 ATTR_LIST(mb_min_to_scan), 2481 ATTR_LIST(mb_order2_req), 2482 ATTR_LIST(mb_stream_req), 2483 ATTR_LIST(mb_group_prealloc), 2484 ATTR_LIST(max_writeback_mb_bump), 2485 NULL, 2486 }; 2487 2488 /* Features this copy of ext4 supports */ 2489 EXT4_INFO_ATTR(lazy_itable_init); 2490 EXT4_INFO_ATTR(batched_discard); 2491 2492 static struct attribute *ext4_feat_attrs[] = { 2493 ATTR_LIST(lazy_itable_init), 2494 ATTR_LIST(batched_discard), 2495 NULL, 2496 }; 2497 2498 static ssize_t ext4_attr_show(struct kobject *kobj, 2499 struct attribute *attr, char *buf) 2500 { 2501 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2502 s_kobj); 2503 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2504 2505 return a->show ? a->show(a, sbi, buf) : 0; 2506 } 2507 2508 static ssize_t ext4_attr_store(struct kobject *kobj, 2509 struct attribute *attr, 2510 const char *buf, size_t len) 2511 { 2512 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2513 s_kobj); 2514 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2515 2516 return a->store ? a->store(a, sbi, buf, len) : 0; 2517 } 2518 2519 static void ext4_sb_release(struct kobject *kobj) 2520 { 2521 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2522 s_kobj); 2523 complete(&sbi->s_kobj_unregister); 2524 } 2525 2526 static const struct sysfs_ops ext4_attr_ops = { 2527 .show = ext4_attr_show, 2528 .store = ext4_attr_store, 2529 }; 2530 2531 static struct kobj_type ext4_ktype = { 2532 .default_attrs = ext4_attrs, 2533 .sysfs_ops = &ext4_attr_ops, 2534 .release = ext4_sb_release, 2535 }; 2536 2537 static void ext4_feat_release(struct kobject *kobj) 2538 { 2539 complete(&ext4_feat->f_kobj_unregister); 2540 } 2541 2542 static struct kobj_type ext4_feat_ktype = { 2543 .default_attrs = ext4_feat_attrs, 2544 .sysfs_ops = &ext4_attr_ops, 2545 .release = ext4_feat_release, 2546 }; 2547 2548 /* 2549 * Check whether this filesystem can be mounted based on 2550 * the features present and the RDONLY/RDWR mount requested. 2551 * Returns 1 if this filesystem can be mounted as requested, 2552 * 0 if it cannot be. 2553 */ 2554 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2555 { 2556 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2557 ext4_msg(sb, KERN_ERR, 2558 "Couldn't mount because of " 2559 "unsupported optional features (%x)", 2560 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2561 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2562 return 0; 2563 } 2564 2565 if (readonly) 2566 return 1; 2567 2568 /* Check that feature set is OK for a read-write mount */ 2569 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2570 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2571 "unsupported optional features (%x)", 2572 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2573 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2574 return 0; 2575 } 2576 /* 2577 * Large file size enabled file system can only be mounted 2578 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2579 */ 2580 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2581 if (sizeof(blkcnt_t) < sizeof(u64)) { 2582 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2583 "cannot be mounted RDWR without " 2584 "CONFIG_LBDAF"); 2585 return 0; 2586 } 2587 } 2588 return 1; 2589 } 2590 2591 /* 2592 * This function is called once a day if we have errors logged 2593 * on the file system 2594 */ 2595 static void print_daily_error_info(unsigned long arg) 2596 { 2597 struct super_block *sb = (struct super_block *) arg; 2598 struct ext4_sb_info *sbi; 2599 struct ext4_super_block *es; 2600 2601 sbi = EXT4_SB(sb); 2602 es = sbi->s_es; 2603 2604 if (es->s_error_count) 2605 ext4_msg(sb, KERN_NOTICE, "error count: %u", 2606 le32_to_cpu(es->s_error_count)); 2607 if (es->s_first_error_time) { 2608 printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", 2609 sb->s_id, le32_to_cpu(es->s_first_error_time), 2610 (int) sizeof(es->s_first_error_func), 2611 es->s_first_error_func, 2612 le32_to_cpu(es->s_first_error_line)); 2613 if (es->s_first_error_ino) 2614 printk(": inode %u", 2615 le32_to_cpu(es->s_first_error_ino)); 2616 if (es->s_first_error_block) 2617 printk(": block %llu", (unsigned long long) 2618 le64_to_cpu(es->s_first_error_block)); 2619 printk("\n"); 2620 } 2621 if (es->s_last_error_time) { 2622 printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", 2623 sb->s_id, le32_to_cpu(es->s_last_error_time), 2624 (int) sizeof(es->s_last_error_func), 2625 es->s_last_error_func, 2626 le32_to_cpu(es->s_last_error_line)); 2627 if (es->s_last_error_ino) 2628 printk(": inode %u", 2629 le32_to_cpu(es->s_last_error_ino)); 2630 if (es->s_last_error_block) 2631 printk(": block %llu", (unsigned long long) 2632 le64_to_cpu(es->s_last_error_block)); 2633 printk("\n"); 2634 } 2635 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2636 } 2637 2638 static void ext4_lazyinode_timeout(unsigned long data) 2639 { 2640 struct task_struct *p = (struct task_struct *)data; 2641 wake_up_process(p); 2642 } 2643 2644 /* Find next suitable group and run ext4_init_inode_table */ 2645 static int ext4_run_li_request(struct ext4_li_request *elr) 2646 { 2647 struct ext4_group_desc *gdp = NULL; 2648 ext4_group_t group, ngroups; 2649 struct super_block *sb; 2650 unsigned long timeout = 0; 2651 int ret = 0; 2652 2653 sb = elr->lr_super; 2654 ngroups = EXT4_SB(sb)->s_groups_count; 2655 2656 for (group = elr->lr_next_group; group < ngroups; group++) { 2657 gdp = ext4_get_group_desc(sb, group, NULL); 2658 if (!gdp) { 2659 ret = 1; 2660 break; 2661 } 2662 2663 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2664 break; 2665 } 2666 2667 if (group == ngroups) 2668 ret = 1; 2669 2670 if (!ret) { 2671 timeout = jiffies; 2672 ret = ext4_init_inode_table(sb, group, 2673 elr->lr_timeout ? 0 : 1); 2674 if (elr->lr_timeout == 0) { 2675 timeout = jiffies - timeout; 2676 if (elr->lr_sbi->s_li_wait_mult) 2677 timeout *= elr->lr_sbi->s_li_wait_mult; 2678 else 2679 timeout *= 20; 2680 elr->lr_timeout = timeout; 2681 } 2682 elr->lr_next_sched = jiffies + elr->lr_timeout; 2683 elr->lr_next_group = group + 1; 2684 } 2685 2686 return ret; 2687 } 2688 2689 /* 2690 * Remove lr_request from the list_request and free the 2691 * request tructure. Should be called with li_list_mtx held 2692 */ 2693 static void ext4_remove_li_request(struct ext4_li_request *elr) 2694 { 2695 struct ext4_sb_info *sbi; 2696 2697 if (!elr) 2698 return; 2699 2700 sbi = elr->lr_sbi; 2701 2702 list_del(&elr->lr_request); 2703 sbi->s_li_request = NULL; 2704 kfree(elr); 2705 } 2706 2707 static void ext4_unregister_li_request(struct super_block *sb) 2708 { 2709 struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; 2710 2711 if (!ext4_li_info) 2712 return; 2713 2714 mutex_lock(&ext4_li_info->li_list_mtx); 2715 ext4_remove_li_request(elr); 2716 mutex_unlock(&ext4_li_info->li_list_mtx); 2717 } 2718 2719 /* 2720 * This is the function where ext4lazyinit thread lives. It walks 2721 * through the request list searching for next scheduled filesystem. 2722 * When such a fs is found, run the lazy initialization request 2723 * (ext4_rn_li_request) and keep track of the time spend in this 2724 * function. Based on that time we compute next schedule time of 2725 * the request. When walking through the list is complete, compute 2726 * next waking time and put itself into sleep. 2727 */ 2728 static int ext4_lazyinit_thread(void *arg) 2729 { 2730 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; 2731 struct list_head *pos, *n; 2732 struct ext4_li_request *elr; 2733 unsigned long next_wakeup; 2734 DEFINE_WAIT(wait); 2735 2736 BUG_ON(NULL == eli); 2737 2738 eli->li_timer.data = (unsigned long)current; 2739 eli->li_timer.function = ext4_lazyinode_timeout; 2740 2741 eli->li_task = current; 2742 wake_up(&eli->li_wait_task); 2743 2744 cont_thread: 2745 while (true) { 2746 next_wakeup = MAX_JIFFY_OFFSET; 2747 2748 mutex_lock(&eli->li_list_mtx); 2749 if (list_empty(&eli->li_request_list)) { 2750 mutex_unlock(&eli->li_list_mtx); 2751 goto exit_thread; 2752 } 2753 2754 list_for_each_safe(pos, n, &eli->li_request_list) { 2755 elr = list_entry(pos, struct ext4_li_request, 2756 lr_request); 2757 2758 if (time_after_eq(jiffies, elr->lr_next_sched)) { 2759 if (ext4_run_li_request(elr) != 0) { 2760 /* error, remove the lazy_init job */ 2761 ext4_remove_li_request(elr); 2762 continue; 2763 } 2764 } 2765 2766 if (time_before(elr->lr_next_sched, next_wakeup)) 2767 next_wakeup = elr->lr_next_sched; 2768 } 2769 mutex_unlock(&eli->li_list_mtx); 2770 2771 if (freezing(current)) 2772 refrigerator(); 2773 2774 if ((time_after_eq(jiffies, next_wakeup)) || 2775 (MAX_JIFFY_OFFSET == next_wakeup)) { 2776 cond_resched(); 2777 continue; 2778 } 2779 2780 eli->li_timer.expires = next_wakeup; 2781 add_timer(&eli->li_timer); 2782 prepare_to_wait(&eli->li_wait_daemon, &wait, 2783 TASK_INTERRUPTIBLE); 2784 if (time_before(jiffies, next_wakeup)) 2785 schedule(); 2786 finish_wait(&eli->li_wait_daemon, &wait); 2787 } 2788 2789 exit_thread: 2790 /* 2791 * It looks like the request list is empty, but we need 2792 * to check it under the li_list_mtx lock, to prevent any 2793 * additions into it, and of course we should lock ext4_li_mtx 2794 * to atomically free the list and ext4_li_info, because at 2795 * this point another ext4 filesystem could be registering 2796 * new one. 2797 */ 2798 mutex_lock(&ext4_li_mtx); 2799 mutex_lock(&eli->li_list_mtx); 2800 if (!list_empty(&eli->li_request_list)) { 2801 mutex_unlock(&eli->li_list_mtx); 2802 mutex_unlock(&ext4_li_mtx); 2803 goto cont_thread; 2804 } 2805 mutex_unlock(&eli->li_list_mtx); 2806 del_timer_sync(&ext4_li_info->li_timer); 2807 eli->li_task = NULL; 2808 wake_up(&eli->li_wait_task); 2809 2810 kfree(ext4_li_info); 2811 ext4_li_info = NULL; 2812 mutex_unlock(&ext4_li_mtx); 2813 2814 return 0; 2815 } 2816 2817 static void ext4_clear_request_list(void) 2818 { 2819 struct list_head *pos, *n; 2820 struct ext4_li_request *elr; 2821 2822 mutex_lock(&ext4_li_info->li_list_mtx); 2823 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { 2824 elr = list_entry(pos, struct ext4_li_request, 2825 lr_request); 2826 ext4_remove_li_request(elr); 2827 } 2828 mutex_unlock(&ext4_li_info->li_list_mtx); 2829 } 2830 2831 static int ext4_run_lazyinit_thread(void) 2832 { 2833 struct task_struct *t; 2834 2835 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); 2836 if (IS_ERR(t)) { 2837 int err = PTR_ERR(t); 2838 ext4_clear_request_list(); 2839 del_timer_sync(&ext4_li_info->li_timer); 2840 kfree(ext4_li_info); 2841 ext4_li_info = NULL; 2842 printk(KERN_CRIT "EXT4: error %d creating inode table " 2843 "initialization thread\n", 2844 err); 2845 return err; 2846 } 2847 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; 2848 2849 wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL); 2850 return 0; 2851 } 2852 2853 /* 2854 * Check whether it make sense to run itable init. thread or not. 2855 * If there is at least one uninitialized inode table, return 2856 * corresponding group number, else the loop goes through all 2857 * groups and return total number of groups. 2858 */ 2859 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) 2860 { 2861 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; 2862 struct ext4_group_desc *gdp = NULL; 2863 2864 for (group = 0; group < ngroups; group++) { 2865 gdp = ext4_get_group_desc(sb, group, NULL); 2866 if (!gdp) 2867 continue; 2868 2869 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2870 break; 2871 } 2872 2873 return group; 2874 } 2875 2876 static int ext4_li_info_new(void) 2877 { 2878 struct ext4_lazy_init *eli = NULL; 2879 2880 eli = kzalloc(sizeof(*eli), GFP_KERNEL); 2881 if (!eli) 2882 return -ENOMEM; 2883 2884 eli->li_task = NULL; 2885 INIT_LIST_HEAD(&eli->li_request_list); 2886 mutex_init(&eli->li_list_mtx); 2887 2888 init_waitqueue_head(&eli->li_wait_daemon); 2889 init_waitqueue_head(&eli->li_wait_task); 2890 init_timer(&eli->li_timer); 2891 eli->li_state |= EXT4_LAZYINIT_QUIT; 2892 2893 ext4_li_info = eli; 2894 2895 return 0; 2896 } 2897 2898 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, 2899 ext4_group_t start) 2900 { 2901 struct ext4_sb_info *sbi = EXT4_SB(sb); 2902 struct ext4_li_request *elr; 2903 unsigned long rnd; 2904 2905 elr = kzalloc(sizeof(*elr), GFP_KERNEL); 2906 if (!elr) 2907 return NULL; 2908 2909 elr->lr_super = sb; 2910 elr->lr_sbi = sbi; 2911 elr->lr_next_group = start; 2912 2913 /* 2914 * Randomize first schedule time of the request to 2915 * spread the inode table initialization requests 2916 * better. 2917 */ 2918 get_random_bytes(&rnd, sizeof(rnd)); 2919 elr->lr_next_sched = jiffies + (unsigned long)rnd % 2920 (EXT4_DEF_LI_MAX_START_DELAY * HZ); 2921 2922 return elr; 2923 } 2924 2925 static int ext4_register_li_request(struct super_block *sb, 2926 ext4_group_t first_not_zeroed) 2927 { 2928 struct ext4_sb_info *sbi = EXT4_SB(sb); 2929 struct ext4_li_request *elr; 2930 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2931 int ret = 0; 2932 2933 if (sbi->s_li_request != NULL) 2934 return 0; 2935 2936 if (first_not_zeroed == ngroups || 2937 (sb->s_flags & MS_RDONLY) || 2938 !test_opt(sb, INIT_INODE_TABLE)) { 2939 sbi->s_li_request = NULL; 2940 return 0; 2941 } 2942 2943 if (first_not_zeroed == ngroups) { 2944 sbi->s_li_request = NULL; 2945 return 0; 2946 } 2947 2948 elr = ext4_li_request_new(sb, first_not_zeroed); 2949 if (!elr) 2950 return -ENOMEM; 2951 2952 mutex_lock(&ext4_li_mtx); 2953 2954 if (NULL == ext4_li_info) { 2955 ret = ext4_li_info_new(); 2956 if (ret) 2957 goto out; 2958 } 2959 2960 mutex_lock(&ext4_li_info->li_list_mtx); 2961 list_add(&elr->lr_request, &ext4_li_info->li_request_list); 2962 mutex_unlock(&ext4_li_info->li_list_mtx); 2963 2964 sbi->s_li_request = elr; 2965 2966 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 2967 ret = ext4_run_lazyinit_thread(); 2968 if (ret) 2969 goto out; 2970 } 2971 out: 2972 mutex_unlock(&ext4_li_mtx); 2973 if (ret) 2974 kfree(elr); 2975 return ret; 2976 } 2977 2978 /* 2979 * We do not need to lock anything since this is called on 2980 * module unload. 2981 */ 2982 static void ext4_destroy_lazyinit_thread(void) 2983 { 2984 /* 2985 * If thread exited earlier 2986 * there's nothing to be done. 2987 */ 2988 if (!ext4_li_info) 2989 return; 2990 2991 ext4_clear_request_list(); 2992 2993 while (ext4_li_info->li_task) { 2994 wake_up(&ext4_li_info->li_wait_daemon); 2995 wait_event(ext4_li_info->li_wait_task, 2996 ext4_li_info->li_task == NULL); 2997 } 2998 } 2999 3000 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3001 __releases(kernel_lock) 3002 __acquires(kernel_lock) 3003 { 3004 char *orig_data = kstrdup(data, GFP_KERNEL); 3005 struct buffer_head *bh; 3006 struct ext4_super_block *es = NULL; 3007 struct ext4_sb_info *sbi; 3008 ext4_fsblk_t block; 3009 ext4_fsblk_t sb_block = get_sb_block(&data); 3010 ext4_fsblk_t logical_sb_block; 3011 unsigned long offset = 0; 3012 unsigned long journal_devnum = 0; 3013 unsigned long def_mount_opts; 3014 struct inode *root; 3015 char *cp; 3016 const char *descr; 3017 int ret = -ENOMEM; 3018 int blocksize; 3019 unsigned int db_count; 3020 unsigned int i; 3021 int needs_recovery, has_huge_files; 3022 __u64 blocks_count; 3023 int err; 3024 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3025 ext4_group_t first_not_zeroed; 3026 3027 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 3028 if (!sbi) 3029 goto out_free_orig; 3030 3031 sbi->s_blockgroup_lock = 3032 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 3033 if (!sbi->s_blockgroup_lock) { 3034 kfree(sbi); 3035 goto out_free_orig; 3036 } 3037 sb->s_fs_info = sbi; 3038 sbi->s_mount_opt = 0; 3039 sbi->s_resuid = EXT4_DEF_RESUID; 3040 sbi->s_resgid = EXT4_DEF_RESGID; 3041 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 3042 sbi->s_sb_block = sb_block; 3043 if (sb->s_bdev->bd_part) 3044 sbi->s_sectors_written_start = 3045 part_stat_read(sb->s_bdev->bd_part, sectors[1]); 3046 3047 /* Cleanup superblock name */ 3048 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3049 *cp = '!'; 3050 3051 ret = -EINVAL; 3052 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 3053 if (!blocksize) { 3054 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 3055 goto out_fail; 3056 } 3057 3058 /* 3059 * The ext4 superblock will not be buffer aligned for other than 1kB 3060 * block sizes. We need to calculate the offset from buffer start. 3061 */ 3062 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 3063 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3064 offset = do_div(logical_sb_block, blocksize); 3065 } else { 3066 logical_sb_block = sb_block; 3067 } 3068 3069 if (!(bh = sb_bread(sb, logical_sb_block))) { 3070 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 3071 goto out_fail; 3072 } 3073 /* 3074 * Note: s_es must be initialized as soon as possible because 3075 * some ext4 macro-instructions depend on its value 3076 */ 3077 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3078 sbi->s_es = es; 3079 sb->s_magic = le16_to_cpu(es->s_magic); 3080 if (sb->s_magic != EXT4_SUPER_MAGIC) 3081 goto cantfind_ext4; 3082 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 3083 3084 /* Set defaults before we parse the mount options */ 3085 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3086 set_opt(sb, INIT_INODE_TABLE); 3087 if (def_mount_opts & EXT4_DEFM_DEBUG) 3088 set_opt(sb, DEBUG); 3089 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { 3090 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", 3091 "2.6.38"); 3092 set_opt(sb, GRPID); 3093 } 3094 if (def_mount_opts & EXT4_DEFM_UID16) 3095 set_opt(sb, NO_UID32); 3096 #ifdef CONFIG_EXT4_FS_XATTR 3097 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 3098 set_opt(sb, XATTR_USER); 3099 #endif 3100 #ifdef CONFIG_EXT4_FS_POSIX_ACL 3101 if (def_mount_opts & EXT4_DEFM_ACL) 3102 set_opt(sb, POSIX_ACL); 3103 #endif 3104 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3105 set_opt(sb, JOURNAL_DATA); 3106 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3107 set_opt(sb, ORDERED_DATA); 3108 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 3109 set_opt(sb, WRITEBACK_DATA); 3110 3111 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 3112 set_opt(sb, ERRORS_PANIC); 3113 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 3114 set_opt(sb, ERRORS_CONT); 3115 else 3116 set_opt(sb, ERRORS_RO); 3117 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) 3118 set_opt(sb, BLOCK_VALIDITY); 3119 if (def_mount_opts & EXT4_DEFM_DISCARD) 3120 set_opt(sb, DISCARD); 3121 3122 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 3123 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 3124 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 3125 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 3126 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 3127 3128 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 3129 set_opt(sb, BARRIER); 3130 3131 /* 3132 * enable delayed allocation by default 3133 * Use -o nodelalloc to turn it off 3134 */ 3135 if (!IS_EXT3_SB(sb) && 3136 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3137 set_opt(sb, DELALLOC); 3138 3139 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3140 &journal_devnum, &journal_ioprio, NULL, 0)) { 3141 ext4_msg(sb, KERN_WARNING, 3142 "failed to parse options in superblock: %s", 3143 sbi->s_es->s_mount_opts); 3144 } 3145 if (!parse_options((char *) data, sb, &journal_devnum, 3146 &journal_ioprio, NULL, 0)) 3147 goto failed_mount; 3148 3149 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3150 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3151 3152 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 3153 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 3154 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 3155 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 3156 ext4_msg(sb, KERN_WARNING, 3157 "feature flags set on rev 0 fs, " 3158 "running e2fsck is recommended"); 3159 3160 /* 3161 * Check feature flags regardless of the revision level, since we 3162 * previously didn't change the revision level when setting the flags, 3163 * so there is a chance incompat flags are set on a rev 0 filesystem. 3164 */ 3165 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3166 goto failed_mount; 3167 3168 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 3169 3170 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3171 blocksize > EXT4_MAX_BLOCK_SIZE) { 3172 ext4_msg(sb, KERN_ERR, 3173 "Unsupported filesystem blocksize %d", blocksize); 3174 goto failed_mount; 3175 } 3176 3177 if (sb->s_blocksize != blocksize) { 3178 /* Validate the filesystem blocksize */ 3179 if (!sb_set_blocksize(sb, blocksize)) { 3180 ext4_msg(sb, KERN_ERR, "bad block size %d", 3181 blocksize); 3182 goto failed_mount; 3183 } 3184 3185 brelse(bh); 3186 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3187 offset = do_div(logical_sb_block, blocksize); 3188 bh = sb_bread(sb, logical_sb_block); 3189 if (!bh) { 3190 ext4_msg(sb, KERN_ERR, 3191 "Can't read superblock on 2nd try"); 3192 goto failed_mount; 3193 } 3194 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 3195 sbi->s_es = es; 3196 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 3197 ext4_msg(sb, KERN_ERR, 3198 "Magic mismatch, very weird!"); 3199 goto failed_mount; 3200 } 3201 } 3202 3203 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3204 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 3205 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 3206 has_huge_files); 3207 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 3208 3209 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 3210 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 3211 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 3212 } else { 3213 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 3214 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 3215 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 3216 (!is_power_of_2(sbi->s_inode_size)) || 3217 (sbi->s_inode_size > blocksize)) { 3218 ext4_msg(sb, KERN_ERR, 3219 "unsupported inode size: %d", 3220 sbi->s_inode_size); 3221 goto failed_mount; 3222 } 3223 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 3224 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 3225 } 3226 3227 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 3228 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 3229 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 3230 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 3231 !is_power_of_2(sbi->s_desc_size)) { 3232 ext4_msg(sb, KERN_ERR, 3233 "unsupported descriptor size %lu", 3234 sbi->s_desc_size); 3235 goto failed_mount; 3236 } 3237 } else 3238 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 3239 3240 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 3241 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 3242 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 3243 goto cantfind_ext4; 3244 3245 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 3246 if (sbi->s_inodes_per_block == 0) 3247 goto cantfind_ext4; 3248 sbi->s_itb_per_group = sbi->s_inodes_per_group / 3249 sbi->s_inodes_per_block; 3250 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 3251 sbi->s_sbh = bh; 3252 sbi->s_mount_state = le16_to_cpu(es->s_state); 3253 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 3254 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 3255 3256 for (i = 0; i < 4; i++) 3257 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 3258 sbi->s_def_hash_version = es->s_def_hash_version; 3259 i = le32_to_cpu(es->s_flags); 3260 if (i & EXT2_FLAGS_UNSIGNED_HASH) 3261 sbi->s_hash_unsigned = 3; 3262 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 3263 #ifdef __CHAR_UNSIGNED__ 3264 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 3265 sbi->s_hash_unsigned = 3; 3266 #else 3267 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3268 #endif 3269 sb->s_dirt = 1; 3270 } 3271 3272 if (sbi->s_blocks_per_group > blocksize * 8) { 3273 ext4_msg(sb, KERN_ERR, 3274 "#blocks per group too big: %lu", 3275 sbi->s_blocks_per_group); 3276 goto failed_mount; 3277 } 3278 if (sbi->s_inodes_per_group > blocksize * 8) { 3279 ext4_msg(sb, KERN_ERR, 3280 "#inodes per group too big: %lu", 3281 sbi->s_inodes_per_group); 3282 goto failed_mount; 3283 } 3284 3285 /* 3286 * Test whether we have more sectors than will fit in sector_t, 3287 * and whether the max offset is addressable by the page cache. 3288 */ 3289 err = generic_check_addressable(sb->s_blocksize_bits, 3290 ext4_blocks_count(es)); 3291 if (err) { 3292 ext4_msg(sb, KERN_ERR, "filesystem" 3293 " too large to mount safely on this system"); 3294 if (sizeof(sector_t) < 8) 3295 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 3296 ret = err; 3297 goto failed_mount; 3298 } 3299 3300 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 3301 goto cantfind_ext4; 3302 3303 /* check blocks count against device size */ 3304 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 3305 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 3306 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 3307 "exceeds size of device (%llu blocks)", 3308 ext4_blocks_count(es), blocks_count); 3309 goto failed_mount; 3310 } 3311 3312 /* 3313 * It makes no sense for the first data block to be beyond the end 3314 * of the filesystem. 3315 */ 3316 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3317 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 3318 "block %u is beyond end of filesystem (%llu)", 3319 le32_to_cpu(es->s_first_data_block), 3320 ext4_blocks_count(es)); 3321 goto failed_mount; 3322 } 3323 blocks_count = (ext4_blocks_count(es) - 3324 le32_to_cpu(es->s_first_data_block) + 3325 EXT4_BLOCKS_PER_GROUP(sb) - 1); 3326 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 3327 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 3328 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 3329 "(block count %llu, first data block %u, " 3330 "blocks per group %lu)", sbi->s_groups_count, 3331 ext4_blocks_count(es), 3332 le32_to_cpu(es->s_first_data_block), 3333 EXT4_BLOCKS_PER_GROUP(sb)); 3334 goto failed_mount; 3335 } 3336 sbi->s_groups_count = blocks_count; 3337 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 3338 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 3339 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 3340 EXT4_DESC_PER_BLOCK(sb); 3341 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 3342 GFP_KERNEL); 3343 if (sbi->s_group_desc == NULL) { 3344 ext4_msg(sb, KERN_ERR, "not enough memory"); 3345 goto failed_mount; 3346 } 3347 3348 #ifdef CONFIG_PROC_FS 3349 if (ext4_proc_root) 3350 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3351 #endif 3352 3353 bgl_lock_init(sbi->s_blockgroup_lock); 3354 3355 for (i = 0; i < db_count; i++) { 3356 block = descriptor_loc(sb, logical_sb_block, i); 3357 sbi->s_group_desc[i] = sb_bread(sb, block); 3358 if (!sbi->s_group_desc[i]) { 3359 ext4_msg(sb, KERN_ERR, 3360 "can't read group descriptor %d", i); 3361 db_count = i; 3362 goto failed_mount2; 3363 } 3364 } 3365 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3366 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3367 goto failed_mount2; 3368 } 3369 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 3370 if (!ext4_fill_flex_info(sb)) { 3371 ext4_msg(sb, KERN_ERR, 3372 "unable to initialize " 3373 "flex_bg meta info!"); 3374 goto failed_mount2; 3375 } 3376 3377 sbi->s_gdb_count = db_count; 3378 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3379 spin_lock_init(&sbi->s_next_gen_lock); 3380 3381 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3382 ext4_count_free_blocks(sb)); 3383 if (!err) { 3384 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3385 ext4_count_free_inodes(sb)); 3386 } 3387 if (!err) { 3388 err = percpu_counter_init(&sbi->s_dirs_counter, 3389 ext4_count_dirs(sb)); 3390 } 3391 if (!err) { 3392 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 3393 } 3394 if (err) { 3395 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3396 goto failed_mount3; 3397 } 3398 3399 sbi->s_stripe = ext4_get_stripe_size(sbi); 3400 sbi->s_max_writeback_mb_bump = 128; 3401 3402 /* 3403 * set up enough so that it can read an inode 3404 */ 3405 if (!test_opt(sb, NOLOAD) && 3406 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 3407 sb->s_op = &ext4_sops; 3408 else 3409 sb->s_op = &ext4_nojournal_sops; 3410 sb->s_export_op = &ext4_export_ops; 3411 sb->s_xattr = ext4_xattr_handlers; 3412 #ifdef CONFIG_QUOTA 3413 sb->s_qcop = &ext4_qctl_operations; 3414 sb->dq_op = &ext4_quota_operations; 3415 #endif 3416 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3417 mutex_init(&sbi->s_orphan_lock); 3418 mutex_init(&sbi->s_resize_lock); 3419 3420 sb->s_root = NULL; 3421 3422 needs_recovery = (es->s_last_orphan != 0 || 3423 EXT4_HAS_INCOMPAT_FEATURE(sb, 3424 EXT4_FEATURE_INCOMPAT_RECOVER)); 3425 3426 /* 3427 * The first inode we look at is the journal inode. Don't try 3428 * root first: it may be modified in the journal! 3429 */ 3430 if (!test_opt(sb, NOLOAD) && 3431 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3432 if (ext4_load_journal(sb, es, journal_devnum)) 3433 goto failed_mount3; 3434 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 3435 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3436 ext4_msg(sb, KERN_ERR, "required journal recovery " 3437 "suppressed and not mounted read-only"); 3438 goto failed_mount_wq; 3439 } else { 3440 clear_opt(sb, DATA_FLAGS); 3441 set_opt(sb, WRITEBACK_DATA); 3442 sbi->s_journal = NULL; 3443 needs_recovery = 0; 3444 goto no_journal; 3445 } 3446 3447 if (ext4_blocks_count(es) > 0xffffffffULL && 3448 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 3449 JBD2_FEATURE_INCOMPAT_64BIT)) { 3450 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 3451 goto failed_mount_wq; 3452 } 3453 3454 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3455 jbd2_journal_set_features(sbi->s_journal, 3456 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3457 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3458 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 3459 jbd2_journal_set_features(sbi->s_journal, 3460 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 3461 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3462 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3463 } else { 3464 jbd2_journal_clear_features(sbi->s_journal, 3465 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3466 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3467 } 3468 3469 /* We have now updated the journal if required, so we can 3470 * validate the data journaling mode. */ 3471 switch (test_opt(sb, DATA_FLAGS)) { 3472 case 0: 3473 /* No mode set, assume a default based on the journal 3474 * capabilities: ORDERED_DATA if the journal can 3475 * cope, else JOURNAL_DATA 3476 */ 3477 if (jbd2_journal_check_available_features 3478 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 3479 set_opt(sb, ORDERED_DATA); 3480 else 3481 set_opt(sb, JOURNAL_DATA); 3482 break; 3483 3484 case EXT4_MOUNT_ORDERED_DATA: 3485 case EXT4_MOUNT_WRITEBACK_DATA: 3486 if (!jbd2_journal_check_available_features 3487 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 3488 ext4_msg(sb, KERN_ERR, "Journal does not support " 3489 "requested data journaling mode"); 3490 goto failed_mount_wq; 3491 } 3492 default: 3493 break; 3494 } 3495 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3496 3497 /* 3498 * The journal may have updated the bg summary counts, so we 3499 * need to update the global counters. 3500 */ 3501 percpu_counter_set(&sbi->s_freeblocks_counter, 3502 ext4_count_free_blocks(sb)); 3503 percpu_counter_set(&sbi->s_freeinodes_counter, 3504 ext4_count_free_inodes(sb)); 3505 percpu_counter_set(&sbi->s_dirs_counter, 3506 ext4_count_dirs(sb)); 3507 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3508 3509 no_journal: 3510 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3511 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3512 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3513 goto failed_mount_wq; 3514 } 3515 3516 /* 3517 * The jbd2_journal_load will have done any necessary log recovery, 3518 * so we can safely mount the rest of the filesystem now. 3519 */ 3520 3521 root = ext4_iget(sb, EXT4_ROOT_INO); 3522 if (IS_ERR(root)) { 3523 ext4_msg(sb, KERN_ERR, "get root inode failed"); 3524 ret = PTR_ERR(root); 3525 goto failed_mount4; 3526 } 3527 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 3528 iput(root); 3529 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 3530 goto failed_mount4; 3531 } 3532 sb->s_root = d_alloc_root(root); 3533 if (!sb->s_root) { 3534 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3535 iput(root); 3536 ret = -ENOMEM; 3537 goto failed_mount4; 3538 } 3539 3540 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 3541 3542 /* determine the minimum size of new large inodes, if present */ 3543 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 3544 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 3545 EXT4_GOOD_OLD_INODE_SIZE; 3546 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3547 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 3548 if (sbi->s_want_extra_isize < 3549 le16_to_cpu(es->s_want_extra_isize)) 3550 sbi->s_want_extra_isize = 3551 le16_to_cpu(es->s_want_extra_isize); 3552 if (sbi->s_want_extra_isize < 3553 le16_to_cpu(es->s_min_extra_isize)) 3554 sbi->s_want_extra_isize = 3555 le16_to_cpu(es->s_min_extra_isize); 3556 } 3557 } 3558 /* Check if enough inode space is available */ 3559 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 3560 sbi->s_inode_size) { 3561 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 3562 EXT4_GOOD_OLD_INODE_SIZE; 3563 ext4_msg(sb, KERN_INFO, "required extra inode space not" 3564 "available"); 3565 } 3566 3567 if (test_opt(sb, DELALLOC) && 3568 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { 3569 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " 3570 "requested data journaling mode"); 3571 clear_opt(sb, DELALLOC); 3572 } 3573 if (test_opt(sb, DIOREAD_NOLOCK)) { 3574 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3575 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3576 "option - requested data journaling mode"); 3577 clear_opt(sb, DIOREAD_NOLOCK); 3578 } 3579 if (sb->s_blocksize < PAGE_SIZE) { 3580 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " 3581 "option - block size is too small"); 3582 clear_opt(sb, DIOREAD_NOLOCK); 3583 } 3584 } 3585 3586 err = ext4_setup_system_zone(sb); 3587 if (err) { 3588 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3589 "zone (%d)", err); 3590 goto failed_mount4; 3591 } 3592 3593 ext4_ext_init(sb); 3594 err = ext4_mb_init(sb, needs_recovery); 3595 if (err) { 3596 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 3597 err); 3598 goto failed_mount4; 3599 } 3600 3601 err = ext4_register_li_request(sb, first_not_zeroed); 3602 if (err) 3603 goto failed_mount4; 3604 3605 sbi->s_kobj.kset = ext4_kset; 3606 init_completion(&sbi->s_kobj_unregister); 3607 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3608 "%s", sb->s_id); 3609 if (err) { 3610 ext4_mb_release(sb); 3611 ext4_ext_release(sb); 3612 goto failed_mount4; 3613 }; 3614 3615 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3616 ext4_orphan_cleanup(sb, es); 3617 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 3618 if (needs_recovery) { 3619 ext4_msg(sb, KERN_INFO, "recovery complete"); 3620 ext4_mark_recovery_complete(sb, es); 3621 } 3622 if (EXT4_SB(sb)->s_journal) { 3623 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 3624 descr = " journalled data mode"; 3625 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 3626 descr = " ordered data mode"; 3627 else 3628 descr = " writeback data mode"; 3629 } else 3630 descr = "out journal"; 3631 3632 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 3633 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3634 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3635 3636 init_timer(&sbi->s_err_report); 3637 sbi->s_err_report.function = print_daily_error_info; 3638 sbi->s_err_report.data = (unsigned long) sb; 3639 if (es->s_error_count) 3640 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3641 3642 kfree(orig_data); 3643 return 0; 3644 3645 cantfind_ext4: 3646 if (!silent) 3647 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3648 goto failed_mount; 3649 3650 failed_mount4: 3651 ext4_msg(sb, KERN_ERR, "mount failed"); 3652 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3653 failed_mount_wq: 3654 ext4_release_system_zone(sb); 3655 if (sbi->s_journal) { 3656 jbd2_journal_destroy(sbi->s_journal); 3657 sbi->s_journal = NULL; 3658 } 3659 failed_mount3: 3660 if (sbi->s_flex_groups) { 3661 if (is_vmalloc_addr(sbi->s_flex_groups)) 3662 vfree(sbi->s_flex_groups); 3663 else 3664 kfree(sbi->s_flex_groups); 3665 } 3666 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3667 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3668 percpu_counter_destroy(&sbi->s_dirs_counter); 3669 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3670 failed_mount2: 3671 for (i = 0; i < db_count; i++) 3672 brelse(sbi->s_group_desc[i]); 3673 kfree(sbi->s_group_desc); 3674 failed_mount: 3675 if (sbi->s_proc) { 3676 remove_proc_entry(sb->s_id, ext4_proc_root); 3677 } 3678 #ifdef CONFIG_QUOTA 3679 for (i = 0; i < MAXQUOTAS; i++) 3680 kfree(sbi->s_qf_names[i]); 3681 #endif 3682 ext4_blkdev_remove(sbi); 3683 brelse(bh); 3684 out_fail: 3685 sb->s_fs_info = NULL; 3686 kfree(sbi->s_blockgroup_lock); 3687 kfree(sbi); 3688 out_free_orig: 3689 kfree(orig_data); 3690 return ret; 3691 } 3692 3693 /* 3694 * Setup any per-fs journal parameters now. We'll do this both on 3695 * initial mount, once the journal has been initialised but before we've 3696 * done any recovery; and again on any subsequent remount. 3697 */ 3698 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 3699 { 3700 struct ext4_sb_info *sbi = EXT4_SB(sb); 3701 3702 journal->j_commit_interval = sbi->s_commit_interval; 3703 journal->j_min_batch_time = sbi->s_min_batch_time; 3704 journal->j_max_batch_time = sbi->s_max_batch_time; 3705 3706 write_lock(&journal->j_state_lock); 3707 if (test_opt(sb, BARRIER)) 3708 journal->j_flags |= JBD2_BARRIER; 3709 else 3710 journal->j_flags &= ~JBD2_BARRIER; 3711 if (test_opt(sb, DATA_ERR_ABORT)) 3712 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 3713 else 3714 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 3715 write_unlock(&journal->j_state_lock); 3716 } 3717 3718 static journal_t *ext4_get_journal(struct super_block *sb, 3719 unsigned int journal_inum) 3720 { 3721 struct inode *journal_inode; 3722 journal_t *journal; 3723 3724 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3725 3726 /* First, test for the existence of a valid inode on disk. Bad 3727 * things happen if we iget() an unused inode, as the subsequent 3728 * iput() will try to delete it. */ 3729 3730 journal_inode = ext4_iget(sb, journal_inum); 3731 if (IS_ERR(journal_inode)) { 3732 ext4_msg(sb, KERN_ERR, "no journal found"); 3733 return NULL; 3734 } 3735 if (!journal_inode->i_nlink) { 3736 make_bad_inode(journal_inode); 3737 iput(journal_inode); 3738 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 3739 return NULL; 3740 } 3741 3742 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 3743 journal_inode, journal_inode->i_size); 3744 if (!S_ISREG(journal_inode->i_mode)) { 3745 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 3746 iput(journal_inode); 3747 return NULL; 3748 } 3749 3750 journal = jbd2_journal_init_inode(journal_inode); 3751 if (!journal) { 3752 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 3753 iput(journal_inode); 3754 return NULL; 3755 } 3756 journal->j_private = sb; 3757 ext4_init_journal_params(sb, journal); 3758 return journal; 3759 } 3760 3761 static journal_t *ext4_get_dev_journal(struct super_block *sb, 3762 dev_t j_dev) 3763 { 3764 struct buffer_head *bh; 3765 journal_t *journal; 3766 ext4_fsblk_t start; 3767 ext4_fsblk_t len; 3768 int hblock, blocksize; 3769 ext4_fsblk_t sb_block; 3770 unsigned long offset; 3771 struct ext4_super_block *es; 3772 struct block_device *bdev; 3773 3774 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3775 3776 bdev = ext4_blkdev_get(j_dev, sb); 3777 if (bdev == NULL) 3778 return NULL; 3779 3780 blocksize = sb->s_blocksize; 3781 hblock = bdev_logical_block_size(bdev); 3782 if (blocksize < hblock) { 3783 ext4_msg(sb, KERN_ERR, 3784 "blocksize too small for journal device"); 3785 goto out_bdev; 3786 } 3787 3788 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 3789 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 3790 set_blocksize(bdev, blocksize); 3791 if (!(bh = __bread(bdev, sb_block, blocksize))) { 3792 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 3793 "external journal"); 3794 goto out_bdev; 3795 } 3796 3797 es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3798 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 3799 !(le32_to_cpu(es->s_feature_incompat) & 3800 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 3801 ext4_msg(sb, KERN_ERR, "external journal has " 3802 "bad superblock"); 3803 brelse(bh); 3804 goto out_bdev; 3805 } 3806 3807 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 3808 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 3809 brelse(bh); 3810 goto out_bdev; 3811 } 3812 3813 len = ext4_blocks_count(es); 3814 start = sb_block + 1; 3815 brelse(bh); /* we're done with the superblock */ 3816 3817 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 3818 start, len, blocksize); 3819 if (!journal) { 3820 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 3821 goto out_bdev; 3822 } 3823 journal->j_private = sb; 3824 ll_rw_block(READ, 1, &journal->j_sb_buffer); 3825 wait_on_buffer(journal->j_sb_buffer); 3826 if (!buffer_uptodate(journal->j_sb_buffer)) { 3827 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 3828 goto out_journal; 3829 } 3830 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 3831 ext4_msg(sb, KERN_ERR, "External journal has more than one " 3832 "user (unsupported) - %d", 3833 be32_to_cpu(journal->j_superblock->s_nr_users)); 3834 goto out_journal; 3835 } 3836 EXT4_SB(sb)->journal_bdev = bdev; 3837 ext4_init_journal_params(sb, journal); 3838 return journal; 3839 3840 out_journal: 3841 jbd2_journal_destroy(journal); 3842 out_bdev: 3843 ext4_blkdev_put(bdev); 3844 return NULL; 3845 } 3846 3847 static int ext4_load_journal(struct super_block *sb, 3848 struct ext4_super_block *es, 3849 unsigned long journal_devnum) 3850 { 3851 journal_t *journal; 3852 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 3853 dev_t journal_dev; 3854 int err = 0; 3855 int really_read_only; 3856 3857 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 3858 3859 if (journal_devnum && 3860 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3861 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 3862 "numbers have changed"); 3863 journal_dev = new_decode_dev(journal_devnum); 3864 } else 3865 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 3866 3867 really_read_only = bdev_read_only(sb->s_bdev); 3868 3869 /* 3870 * Are we loading a blank journal or performing recovery after a 3871 * crash? For recovery, we need to check in advance whether we 3872 * can get read-write access to the device. 3873 */ 3874 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3875 if (sb->s_flags & MS_RDONLY) { 3876 ext4_msg(sb, KERN_INFO, "INFO: recovery " 3877 "required on readonly filesystem"); 3878 if (really_read_only) { 3879 ext4_msg(sb, KERN_ERR, "write access " 3880 "unavailable, cannot proceed"); 3881 return -EROFS; 3882 } 3883 ext4_msg(sb, KERN_INFO, "write access will " 3884 "be enabled during recovery"); 3885 } 3886 } 3887 3888 if (journal_inum && journal_dev) { 3889 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 3890 "and inode journals!"); 3891 return -EINVAL; 3892 } 3893 3894 if (journal_inum) { 3895 if (!(journal = ext4_get_journal(sb, journal_inum))) 3896 return -EINVAL; 3897 } else { 3898 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 3899 return -EINVAL; 3900 } 3901 3902 if (!(journal->j_flags & JBD2_BARRIER)) 3903 ext4_msg(sb, KERN_INFO, "barriers disabled"); 3904 3905 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 3906 err = jbd2_journal_update_format(journal); 3907 if (err) { 3908 ext4_msg(sb, KERN_ERR, "error updating journal"); 3909 jbd2_journal_destroy(journal); 3910 return err; 3911 } 3912 } 3913 3914 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 3915 err = jbd2_journal_wipe(journal, !really_read_only); 3916 if (!err) { 3917 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); 3918 if (save) 3919 memcpy(save, ((char *) es) + 3920 EXT4_S_ERR_START, EXT4_S_ERR_LEN); 3921 err = jbd2_journal_load(journal); 3922 if (save) 3923 memcpy(((char *) es) + EXT4_S_ERR_START, 3924 save, EXT4_S_ERR_LEN); 3925 kfree(save); 3926 } 3927 3928 if (err) { 3929 ext4_msg(sb, KERN_ERR, "error loading journal"); 3930 jbd2_journal_destroy(journal); 3931 return err; 3932 } 3933 3934 EXT4_SB(sb)->s_journal = journal; 3935 ext4_clear_journal_err(sb, es); 3936 3937 if (!really_read_only && journal_devnum && 3938 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 3939 es->s_journal_dev = cpu_to_le32(journal_devnum); 3940 3941 /* Make sure we flush the recovery flag to disk. */ 3942 ext4_commit_super(sb, 1); 3943 } 3944 3945 return 0; 3946 } 3947 3948 static int ext4_commit_super(struct super_block *sb, int sync) 3949 { 3950 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 3951 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 3952 int error = 0; 3953 3954 if (!sbh) 3955 return error; 3956 if (buffer_write_io_error(sbh)) { 3957 /* 3958 * Oh, dear. A previous attempt to write the 3959 * superblock failed. This could happen because the 3960 * USB device was yanked out. Or it could happen to 3961 * be a transient write error and maybe the block will 3962 * be remapped. Nothing we can do but to retry the 3963 * write and hope for the best. 3964 */ 3965 ext4_msg(sb, KERN_ERR, "previous I/O error to " 3966 "superblock detected"); 3967 clear_buffer_write_io_error(sbh); 3968 set_buffer_uptodate(sbh); 3969 } 3970 /* 3971 * If the file system is mounted read-only, don't update the 3972 * superblock write time. This avoids updating the superblock 3973 * write time when we are mounting the root file system 3974 * read/only but we need to replay the journal; at that point, 3975 * for people who are east of GMT and who make their clock 3976 * tick in localtime for Windows bug-for-bug compatibility, 3977 * the clock is set in the future, and this will cause e2fsck 3978 * to complain and force a full file system check. 3979 */ 3980 if (!(sb->s_flags & MS_RDONLY)) 3981 es->s_wtime = cpu_to_le32(get_seconds()); 3982 if (sb->s_bdev->bd_part) 3983 es->s_kbytes_written = 3984 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 3985 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 3986 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 3987 else 3988 es->s_kbytes_written = 3989 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 3990 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3991 &EXT4_SB(sb)->s_freeblocks_counter)); 3992 es->s_free_inodes_count = 3993 cpu_to_le32(percpu_counter_sum_positive( 3994 &EXT4_SB(sb)->s_freeinodes_counter)); 3995 sb->s_dirt = 0; 3996 BUFFER_TRACE(sbh, "marking dirty"); 3997 mark_buffer_dirty(sbh); 3998 if (sync) { 3999 error = sync_dirty_buffer(sbh); 4000 if (error) 4001 return error; 4002 4003 error = buffer_write_io_error(sbh); 4004 if (error) { 4005 ext4_msg(sb, KERN_ERR, "I/O error while writing " 4006 "superblock"); 4007 clear_buffer_write_io_error(sbh); 4008 set_buffer_uptodate(sbh); 4009 } 4010 } 4011 return error; 4012 } 4013 4014 /* 4015 * Have we just finished recovery? If so, and if we are mounting (or 4016 * remounting) the filesystem readonly, then we will end up with a 4017 * consistent fs on disk. Record that fact. 4018 */ 4019 static void ext4_mark_recovery_complete(struct super_block *sb, 4020 struct ext4_super_block *es) 4021 { 4022 journal_t *journal = EXT4_SB(sb)->s_journal; 4023 4024 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 4025 BUG_ON(journal != NULL); 4026 return; 4027 } 4028 jbd2_journal_lock_updates(journal); 4029 if (jbd2_journal_flush(journal) < 0) 4030 goto out; 4031 4032 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 4033 sb->s_flags & MS_RDONLY) { 4034 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4035 ext4_commit_super(sb, 1); 4036 } 4037 4038 out: 4039 jbd2_journal_unlock_updates(journal); 4040 } 4041 4042 /* 4043 * If we are mounting (or read-write remounting) a filesystem whose journal 4044 * has recorded an error from a previous lifetime, move that error to the 4045 * main filesystem now. 4046 */ 4047 static void ext4_clear_journal_err(struct super_block *sb, 4048 struct ext4_super_block *es) 4049 { 4050 journal_t *journal; 4051 int j_errno; 4052 const char *errstr; 4053 4054 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4055 4056 journal = EXT4_SB(sb)->s_journal; 4057 4058 /* 4059 * Now check for any error status which may have been recorded in the 4060 * journal by a prior ext4_error() or ext4_abort() 4061 */ 4062 4063 j_errno = jbd2_journal_errno(journal); 4064 if (j_errno) { 4065 char nbuf[16]; 4066 4067 errstr = ext4_decode_error(sb, j_errno, nbuf); 4068 ext4_warning(sb, "Filesystem error recorded " 4069 "from previous mount: %s", errstr); 4070 ext4_warning(sb, "Marking fs in need of filesystem check."); 4071 4072 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 4073 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 4074 ext4_commit_super(sb, 1); 4075 4076 jbd2_journal_clear_err(journal); 4077 } 4078 } 4079 4080 /* 4081 * Force the running and committing transactions to commit, 4082 * and wait on the commit. 4083 */ 4084 int ext4_force_commit(struct super_block *sb) 4085 { 4086 journal_t *journal; 4087 int ret = 0; 4088 4089 if (sb->s_flags & MS_RDONLY) 4090 return 0; 4091 4092 journal = EXT4_SB(sb)->s_journal; 4093 if (journal) { 4094 vfs_check_frozen(sb, SB_FREEZE_TRANS); 4095 ret = ext4_journal_force_commit(journal); 4096 } 4097 4098 return ret; 4099 } 4100 4101 static void ext4_write_super(struct super_block *sb) 4102 { 4103 lock_super(sb); 4104 ext4_commit_super(sb, 1); 4105 unlock_super(sb); 4106 } 4107 4108 static int ext4_sync_fs(struct super_block *sb, int wait) 4109 { 4110 int ret = 0; 4111 tid_t target; 4112 struct ext4_sb_info *sbi = EXT4_SB(sb); 4113 4114 trace_ext4_sync_fs(sb, wait); 4115 flush_workqueue(sbi->dio_unwritten_wq); 4116 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 4117 if (wait) 4118 jbd2_log_wait_commit(sbi->s_journal, target); 4119 } 4120 return ret; 4121 } 4122 4123 /* 4124 * LVM calls this function before a (read-only) snapshot is created. This 4125 * gives us a chance to flush the journal completely and mark the fs clean. 4126 */ 4127 static int ext4_freeze(struct super_block *sb) 4128 { 4129 int error = 0; 4130 journal_t *journal; 4131 4132 if (sb->s_flags & MS_RDONLY) 4133 return 0; 4134 4135 journal = EXT4_SB(sb)->s_journal; 4136 4137 /* Now we set up the journal barrier. */ 4138 jbd2_journal_lock_updates(journal); 4139 4140 /* 4141 * Don't clear the needs_recovery flag if we failed to flush 4142 * the journal. 4143 */ 4144 error = jbd2_journal_flush(journal); 4145 if (error < 0) 4146 goto out; 4147 4148 /* Journal blocked and flushed, clear needs_recovery flag. */ 4149 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4150 error = ext4_commit_super(sb, 1); 4151 out: 4152 /* we rely on s_frozen to stop further updates */ 4153 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4154 return error; 4155 } 4156 4157 /* 4158 * Called by LVM after the snapshot is done. We need to reset the RECOVER 4159 * flag here, even though the filesystem is not technically dirty yet. 4160 */ 4161 static int ext4_unfreeze(struct super_block *sb) 4162 { 4163 if (sb->s_flags & MS_RDONLY) 4164 return 0; 4165 4166 lock_super(sb); 4167 /* Reset the needs_recovery flag before the fs is unlocked. */ 4168 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4169 ext4_commit_super(sb, 1); 4170 unlock_super(sb); 4171 return 0; 4172 } 4173 4174 /* 4175 * Structure to save mount options for ext4_remount's benefit 4176 */ 4177 struct ext4_mount_options { 4178 unsigned long s_mount_opt; 4179 unsigned long s_mount_opt2; 4180 uid_t s_resuid; 4181 gid_t s_resgid; 4182 unsigned long s_commit_interval; 4183 u32 s_min_batch_time, s_max_batch_time; 4184 #ifdef CONFIG_QUOTA 4185 int s_jquota_fmt; 4186 char *s_qf_names[MAXQUOTAS]; 4187 #endif 4188 }; 4189 4190 static int ext4_remount(struct super_block *sb, int *flags, char *data) 4191 { 4192 struct ext4_super_block *es; 4193 struct ext4_sb_info *sbi = EXT4_SB(sb); 4194 ext4_fsblk_t n_blocks_count = 0; 4195 unsigned long old_sb_flags; 4196 struct ext4_mount_options old_opts; 4197 int enable_quota = 0; 4198 ext4_group_t g; 4199 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4200 int err; 4201 #ifdef CONFIG_QUOTA 4202 int i; 4203 #endif 4204 char *orig_data = kstrdup(data, GFP_KERNEL); 4205 4206 /* Store the original options */ 4207 lock_super(sb); 4208 old_sb_flags = sb->s_flags; 4209 old_opts.s_mount_opt = sbi->s_mount_opt; 4210 old_opts.s_mount_opt2 = sbi->s_mount_opt2; 4211 old_opts.s_resuid = sbi->s_resuid; 4212 old_opts.s_resgid = sbi->s_resgid; 4213 old_opts.s_commit_interval = sbi->s_commit_interval; 4214 old_opts.s_min_batch_time = sbi->s_min_batch_time; 4215 old_opts.s_max_batch_time = sbi->s_max_batch_time; 4216 #ifdef CONFIG_QUOTA 4217 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4218 for (i = 0; i < MAXQUOTAS; i++) 4219 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 4220 #endif 4221 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 4222 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 4223 4224 /* 4225 * Allow the "check" option to be passed as a remount option. 4226 */ 4227 if (!parse_options(data, sb, NULL, &journal_ioprio, 4228 &n_blocks_count, 1)) { 4229 err = -EINVAL; 4230 goto restore_opts; 4231 } 4232 4233 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 4234 ext4_abort(sb, "Abort forced by user"); 4235 4236 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 4237 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 4238 4239 es = sbi->s_es; 4240 4241 if (sbi->s_journal) { 4242 ext4_init_journal_params(sb, sbi->s_journal); 4243 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4244 } 4245 4246 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 4247 n_blocks_count > ext4_blocks_count(es)) { 4248 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4249 err = -EROFS; 4250 goto restore_opts; 4251 } 4252 4253 if (*flags & MS_RDONLY) { 4254 err = dquot_suspend(sb, -1); 4255 if (err < 0) 4256 goto restore_opts; 4257 4258 /* 4259 * First of all, the unconditional stuff we have to do 4260 * to disable replay of the journal when we next remount 4261 */ 4262 sb->s_flags |= MS_RDONLY; 4263 4264 /* 4265 * OK, test if we are remounting a valid rw partition 4266 * readonly, and if so set the rdonly flag and then 4267 * mark the partition as valid again. 4268 */ 4269 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 4270 (sbi->s_mount_state & EXT4_VALID_FS)) 4271 es->s_state = cpu_to_le16(sbi->s_mount_state); 4272 4273 if (sbi->s_journal) 4274 ext4_mark_recovery_complete(sb, es); 4275 } else { 4276 /* Make sure we can mount this feature set readwrite */ 4277 if (!ext4_feature_set_ok(sb, 0)) { 4278 err = -EROFS; 4279 goto restore_opts; 4280 } 4281 /* 4282 * Make sure the group descriptor checksums 4283 * are sane. If they aren't, refuse to remount r/w. 4284 */ 4285 for (g = 0; g < sbi->s_groups_count; g++) { 4286 struct ext4_group_desc *gdp = 4287 ext4_get_group_desc(sb, g, NULL); 4288 4289 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 4290 ext4_msg(sb, KERN_ERR, 4291 "ext4_remount: Checksum for group %u failed (%u!=%u)", 4292 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 4293 le16_to_cpu(gdp->bg_checksum)); 4294 err = -EINVAL; 4295 goto restore_opts; 4296 } 4297 } 4298 4299 /* 4300 * If we have an unprocessed orphan list hanging 4301 * around from a previously readonly bdev mount, 4302 * require a full umount/remount for now. 4303 */ 4304 if (es->s_last_orphan) { 4305 ext4_msg(sb, KERN_WARNING, "Couldn't " 4306 "remount RDWR because of unprocessed " 4307 "orphan inode list. Please " 4308 "umount/remount instead"); 4309 err = -EINVAL; 4310 goto restore_opts; 4311 } 4312 4313 /* 4314 * Mounting a RDONLY partition read-write, so reread 4315 * and store the current valid flag. (It may have 4316 * been changed by e2fsck since we originally mounted 4317 * the partition.) 4318 */ 4319 if (sbi->s_journal) 4320 ext4_clear_journal_err(sb, es); 4321 sbi->s_mount_state = le16_to_cpu(es->s_state); 4322 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 4323 goto restore_opts; 4324 if (!ext4_setup_super(sb, es, 0)) 4325 sb->s_flags &= ~MS_RDONLY; 4326 enable_quota = 1; 4327 } 4328 } 4329 4330 /* 4331 * Reinitialize lazy itable initialization thread based on 4332 * current settings 4333 */ 4334 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) 4335 ext4_unregister_li_request(sb); 4336 else { 4337 ext4_group_t first_not_zeroed; 4338 first_not_zeroed = ext4_has_uninit_itable(sb); 4339 ext4_register_li_request(sb, first_not_zeroed); 4340 } 4341 4342 ext4_setup_system_zone(sb); 4343 if (sbi->s_journal == NULL) 4344 ext4_commit_super(sb, 1); 4345 4346 #ifdef CONFIG_QUOTA 4347 /* Release old quota file names */ 4348 for (i = 0; i < MAXQUOTAS; i++) 4349 if (old_opts.s_qf_names[i] && 4350 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 4351 kfree(old_opts.s_qf_names[i]); 4352 #endif 4353 unlock_super(sb); 4354 if (enable_quota) 4355 dquot_resume(sb, -1); 4356 4357 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); 4358 kfree(orig_data); 4359 return 0; 4360 4361 restore_opts: 4362 sb->s_flags = old_sb_flags; 4363 sbi->s_mount_opt = old_opts.s_mount_opt; 4364 sbi->s_mount_opt2 = old_opts.s_mount_opt2; 4365 sbi->s_resuid = old_opts.s_resuid; 4366 sbi->s_resgid = old_opts.s_resgid; 4367 sbi->s_commit_interval = old_opts.s_commit_interval; 4368 sbi->s_min_batch_time = old_opts.s_min_batch_time; 4369 sbi->s_max_batch_time = old_opts.s_max_batch_time; 4370 #ifdef CONFIG_QUOTA 4371 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 4372 for (i = 0; i < MAXQUOTAS; i++) { 4373 if (sbi->s_qf_names[i] && 4374 old_opts.s_qf_names[i] != sbi->s_qf_names[i]) 4375 kfree(sbi->s_qf_names[i]); 4376 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 4377 } 4378 #endif 4379 unlock_super(sb); 4380 kfree(orig_data); 4381 return err; 4382 } 4383 4384 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 4385 { 4386 struct super_block *sb = dentry->d_sb; 4387 struct ext4_sb_info *sbi = EXT4_SB(sb); 4388 struct ext4_super_block *es = sbi->s_es; 4389 u64 fsid; 4390 4391 if (test_opt(sb, MINIX_DF)) { 4392 sbi->s_overhead_last = 0; 4393 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 4394 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 4395 ext4_fsblk_t overhead = 0; 4396 4397 /* 4398 * Compute the overhead (FS structures). This is constant 4399 * for a given filesystem unless the number of block groups 4400 * changes so we cache the previous value until it does. 4401 */ 4402 4403 /* 4404 * All of the blocks before first_data_block are 4405 * overhead 4406 */ 4407 overhead = le32_to_cpu(es->s_first_data_block); 4408 4409 /* 4410 * Add the overhead attributed to the superblock and 4411 * block group descriptors. If the sparse superblocks 4412 * feature is turned on, then not all groups have this. 4413 */ 4414 for (i = 0; i < ngroups; i++) { 4415 overhead += ext4_bg_has_super(sb, i) + 4416 ext4_bg_num_gdb(sb, i); 4417 cond_resched(); 4418 } 4419 4420 /* 4421 * Every block group has an inode bitmap, a block 4422 * bitmap, and an inode table. 4423 */ 4424 overhead += ngroups * (2 + sbi->s_itb_per_group); 4425 sbi->s_overhead_last = overhead; 4426 smp_wmb(); 4427 sbi->s_blocks_last = ext4_blocks_count(es); 4428 } 4429 4430 buf->f_type = EXT4_SUPER_MAGIC; 4431 buf->f_bsize = sb->s_blocksize; 4432 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 4433 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 4434 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 4435 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 4436 if (buf->f_bfree < ext4_r_blocks_count(es)) 4437 buf->f_bavail = 0; 4438 buf->f_files = le32_to_cpu(es->s_inodes_count); 4439 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 4440 buf->f_namelen = EXT4_NAME_LEN; 4441 fsid = le64_to_cpup((void *)es->s_uuid) ^ 4442 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 4443 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 4444 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 4445 4446 return 0; 4447 } 4448 4449 /* Helper function for writing quotas on sync - we need to start transaction 4450 * before quota file is locked for write. Otherwise the are possible deadlocks: 4451 * Process 1 Process 2 4452 * ext4_create() quota_sync() 4453 * jbd2_journal_start() write_dquot() 4454 * dquot_initialize() down(dqio_mutex) 4455 * down(dqio_mutex) jbd2_journal_start() 4456 * 4457 */ 4458 4459 #ifdef CONFIG_QUOTA 4460 4461 static inline struct inode *dquot_to_inode(struct dquot *dquot) 4462 { 4463 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 4464 } 4465 4466 static int ext4_write_dquot(struct dquot *dquot) 4467 { 4468 int ret, err; 4469 handle_t *handle; 4470 struct inode *inode; 4471 4472 inode = dquot_to_inode(dquot); 4473 handle = ext4_journal_start(inode, 4474 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 4475 if (IS_ERR(handle)) 4476 return PTR_ERR(handle); 4477 ret = dquot_commit(dquot); 4478 err = ext4_journal_stop(handle); 4479 if (!ret) 4480 ret = err; 4481 return ret; 4482 } 4483 4484 static int ext4_acquire_dquot(struct dquot *dquot) 4485 { 4486 int ret, err; 4487 handle_t *handle; 4488 4489 handle = ext4_journal_start(dquot_to_inode(dquot), 4490 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 4491 if (IS_ERR(handle)) 4492 return PTR_ERR(handle); 4493 ret = dquot_acquire(dquot); 4494 err = ext4_journal_stop(handle); 4495 if (!ret) 4496 ret = err; 4497 return ret; 4498 } 4499 4500 static int ext4_release_dquot(struct dquot *dquot) 4501 { 4502 int ret, err; 4503 handle_t *handle; 4504 4505 handle = ext4_journal_start(dquot_to_inode(dquot), 4506 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 4507 if (IS_ERR(handle)) { 4508 /* Release dquot anyway to avoid endless cycle in dqput() */ 4509 dquot_release(dquot); 4510 return PTR_ERR(handle); 4511 } 4512 ret = dquot_release(dquot); 4513 err = ext4_journal_stop(handle); 4514 if (!ret) 4515 ret = err; 4516 return ret; 4517 } 4518 4519 static int ext4_mark_dquot_dirty(struct dquot *dquot) 4520 { 4521 /* Are we journaling quotas? */ 4522 if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 4523 EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 4524 dquot_mark_dquot_dirty(dquot); 4525 return ext4_write_dquot(dquot); 4526 } else { 4527 return dquot_mark_dquot_dirty(dquot); 4528 } 4529 } 4530 4531 static int ext4_write_info(struct super_block *sb, int type) 4532 { 4533 int ret, err; 4534 handle_t *handle; 4535 4536 /* Data block + inode block */ 4537 handle = ext4_journal_start(sb->s_root->d_inode, 2); 4538 if (IS_ERR(handle)) 4539 return PTR_ERR(handle); 4540 ret = dquot_commit_info(sb, type); 4541 err = ext4_journal_stop(handle); 4542 if (!ret) 4543 ret = err; 4544 return ret; 4545 } 4546 4547 /* 4548 * Turn on quotas during mount time - we need to find 4549 * the quota file and such... 4550 */ 4551 static int ext4_quota_on_mount(struct super_block *sb, int type) 4552 { 4553 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 4554 EXT4_SB(sb)->s_jquota_fmt, type); 4555 } 4556 4557 /* 4558 * Standard function to be called on quota_on 4559 */ 4560 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 4561 struct path *path) 4562 { 4563 int err; 4564 4565 if (!test_opt(sb, QUOTA)) 4566 return -EINVAL; 4567 4568 /* Quotafile not on the same filesystem? */ 4569 if (path->mnt->mnt_sb != sb) 4570 return -EXDEV; 4571 /* Journaling quota? */ 4572 if (EXT4_SB(sb)->s_qf_names[type]) { 4573 /* Quotafile not in fs root? */ 4574 if (path->dentry->d_parent != sb->s_root) 4575 ext4_msg(sb, KERN_WARNING, 4576 "Quota file not on filesystem root. " 4577 "Journaled quota will not work"); 4578 } 4579 4580 /* 4581 * When we journal data on quota file, we have to flush journal to see 4582 * all updates to the file when we bypass pagecache... 4583 */ 4584 if (EXT4_SB(sb)->s_journal && 4585 ext4_should_journal_data(path->dentry->d_inode)) { 4586 /* 4587 * We don't need to lock updates but journal_flush() could 4588 * otherwise be livelocked... 4589 */ 4590 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 4591 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 4592 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4593 if (err) 4594 return err; 4595 } 4596 4597 return dquot_quota_on(sb, type, format_id, path); 4598 } 4599 4600 static int ext4_quota_off(struct super_block *sb, int type) 4601 { 4602 /* Force all delayed allocation blocks to be allocated. 4603 * Caller already holds s_umount sem */ 4604 if (test_opt(sb, DELALLOC)) 4605 sync_filesystem(sb); 4606 4607 return dquot_quota_off(sb, type); 4608 } 4609 4610 /* Read data from quotafile - avoid pagecache and such because we cannot afford 4611 * acquiring the locks... As quota files are never truncated and quota code 4612 * itself serializes the operations (and noone else should touch the files) 4613 * we don't have to be afraid of races */ 4614 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 4615 size_t len, loff_t off) 4616 { 4617 struct inode *inode = sb_dqopt(sb)->files[type]; 4618 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4619 int err = 0; 4620 int offset = off & (sb->s_blocksize - 1); 4621 int tocopy; 4622 size_t toread; 4623 struct buffer_head *bh; 4624 loff_t i_size = i_size_read(inode); 4625 4626 if (off > i_size) 4627 return 0; 4628 if (off+len > i_size) 4629 len = i_size-off; 4630 toread = len; 4631 while (toread > 0) { 4632 tocopy = sb->s_blocksize - offset < toread ? 4633 sb->s_blocksize - offset : toread; 4634 bh = ext4_bread(NULL, inode, blk, 0, &err); 4635 if (err) 4636 return err; 4637 if (!bh) /* A hole? */ 4638 memset(data, 0, tocopy); 4639 else 4640 memcpy(data, bh->b_data+offset, tocopy); 4641 brelse(bh); 4642 offset = 0; 4643 toread -= tocopy; 4644 data += tocopy; 4645 blk++; 4646 } 4647 return len; 4648 } 4649 4650 /* Write to quotafile (we know the transaction is already started and has 4651 * enough credits) */ 4652 static ssize_t ext4_quota_write(struct super_block *sb, int type, 4653 const char *data, size_t len, loff_t off) 4654 { 4655 struct inode *inode = sb_dqopt(sb)->files[type]; 4656 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 4657 int err = 0; 4658 int offset = off & (sb->s_blocksize - 1); 4659 struct buffer_head *bh; 4660 handle_t *handle = journal_current_handle(); 4661 4662 if (EXT4_SB(sb)->s_journal && !handle) { 4663 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4664 " cancelled because transaction is not started", 4665 (unsigned long long)off, (unsigned long long)len); 4666 return -EIO; 4667 } 4668 /* 4669 * Since we account only one data block in transaction credits, 4670 * then it is impossible to cross a block boundary. 4671 */ 4672 if (sb->s_blocksize - offset < len) { 4673 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 4674 " cancelled because not block aligned", 4675 (unsigned long long)off, (unsigned long long)len); 4676 return -EIO; 4677 } 4678 4679 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); 4680 bh = ext4_bread(handle, inode, blk, 1, &err); 4681 if (!bh) 4682 goto out; 4683 err = ext4_journal_get_write_access(handle, bh); 4684 if (err) { 4685 brelse(bh); 4686 goto out; 4687 } 4688 lock_buffer(bh); 4689 memcpy(bh->b_data+offset, data, len); 4690 flush_dcache_page(bh->b_page); 4691 unlock_buffer(bh); 4692 err = ext4_handle_dirty_metadata(handle, NULL, bh); 4693 brelse(bh); 4694 out: 4695 if (err) { 4696 mutex_unlock(&inode->i_mutex); 4697 return err; 4698 } 4699 if (inode->i_size < off + len) { 4700 i_size_write(inode, off + len); 4701 EXT4_I(inode)->i_disksize = inode->i_size; 4702 } 4703 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4704 ext4_mark_inode_dirty(handle, inode); 4705 mutex_unlock(&inode->i_mutex); 4706 return len; 4707 } 4708 4709 #endif 4710 4711 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 4712 const char *dev_name, void *data) 4713 { 4714 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); 4715 } 4716 4717 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4718 static struct file_system_type ext2_fs_type = { 4719 .owner = THIS_MODULE, 4720 .name = "ext2", 4721 .mount = ext4_mount, 4722 .kill_sb = kill_block_super, 4723 .fs_flags = FS_REQUIRES_DEV, 4724 }; 4725 4726 static inline void register_as_ext2(void) 4727 { 4728 int err = register_filesystem(&ext2_fs_type); 4729 if (err) 4730 printk(KERN_WARNING 4731 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 4732 } 4733 4734 static inline void unregister_as_ext2(void) 4735 { 4736 unregister_filesystem(&ext2_fs_type); 4737 } 4738 MODULE_ALIAS("ext2"); 4739 #else 4740 static inline void register_as_ext2(void) { } 4741 static inline void unregister_as_ext2(void) { } 4742 #endif 4743 4744 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 4745 static inline void register_as_ext3(void) 4746 { 4747 int err = register_filesystem(&ext3_fs_type); 4748 if (err) 4749 printk(KERN_WARNING 4750 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 4751 } 4752 4753 static inline void unregister_as_ext3(void) 4754 { 4755 unregister_filesystem(&ext3_fs_type); 4756 } 4757 MODULE_ALIAS("ext3"); 4758 #else 4759 static inline void register_as_ext3(void) { } 4760 static inline void unregister_as_ext3(void) { } 4761 #endif 4762 4763 static struct file_system_type ext4_fs_type = { 4764 .owner = THIS_MODULE, 4765 .name = "ext4", 4766 .mount = ext4_mount, 4767 .kill_sb = kill_block_super, 4768 .fs_flags = FS_REQUIRES_DEV, 4769 }; 4770 4771 int __init ext4_init_feat_adverts(void) 4772 { 4773 struct ext4_features *ef; 4774 int ret = -ENOMEM; 4775 4776 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); 4777 if (!ef) 4778 goto out; 4779 4780 ef->f_kobj.kset = ext4_kset; 4781 init_completion(&ef->f_kobj_unregister); 4782 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, 4783 "features"); 4784 if (ret) { 4785 kfree(ef); 4786 goto out; 4787 } 4788 4789 ext4_feat = ef; 4790 ret = 0; 4791 out: 4792 return ret; 4793 } 4794 4795 static int __init ext4_init_fs(void) 4796 { 4797 int err; 4798 4799 ext4_check_flag_values(); 4800 err = ext4_init_pageio(); 4801 if (err) 4802 return err; 4803 err = ext4_init_system_zone(); 4804 if (err) 4805 goto out5; 4806 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4807 if (!ext4_kset) 4808 goto out4; 4809 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4810 4811 err = ext4_init_feat_adverts(); 4812 4813 err = ext4_init_mballoc(); 4814 if (err) 4815 goto out3; 4816 4817 err = ext4_init_xattr(); 4818 if (err) 4819 goto out2; 4820 err = init_inodecache(); 4821 if (err) 4822 goto out1; 4823 register_as_ext2(); 4824 register_as_ext3(); 4825 err = register_filesystem(&ext4_fs_type); 4826 if (err) 4827 goto out; 4828 4829 ext4_li_info = NULL; 4830 mutex_init(&ext4_li_mtx); 4831 return 0; 4832 out: 4833 unregister_as_ext2(); 4834 unregister_as_ext3(); 4835 destroy_inodecache(); 4836 out1: 4837 ext4_exit_xattr(); 4838 out2: 4839 ext4_exit_mballoc(); 4840 out3: 4841 kfree(ext4_feat); 4842 remove_proc_entry("fs/ext4", NULL); 4843 kset_unregister(ext4_kset); 4844 out4: 4845 ext4_exit_system_zone(); 4846 out5: 4847 ext4_exit_pageio(); 4848 return err; 4849 } 4850 4851 static void __exit ext4_exit_fs(void) 4852 { 4853 ext4_destroy_lazyinit_thread(); 4854 unregister_as_ext2(); 4855 unregister_as_ext3(); 4856 unregister_filesystem(&ext4_fs_type); 4857 destroy_inodecache(); 4858 ext4_exit_xattr(); 4859 ext4_exit_mballoc(); 4860 remove_proc_entry("fs/ext4", NULL); 4861 kset_unregister(ext4_kset); 4862 ext4_exit_system_zone(); 4863 ext4_exit_pageio(); 4864 } 4865 4866 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 4867 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 4868 MODULE_LICENSE("GPL"); 4869 module_init(ext4_init_fs) 4870 module_exit(ext4_exit_fs) 4871