1 /* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19 #include <linux/module.h> 20 #include <linux/string.h> 21 #include <linux/fs.h> 22 #include <linux/time.h> 23 #include <linux/vmalloc.h> 24 #include <linux/slab.h> 25 #include <linux/init.h> 26 #include <linux/blkdev.h> 27 #include <linux/parser.h> 28 #include <linux/buffer_head.h> 29 #include <linux/exportfs.h> 30 #include <linux/vfs.h> 31 #include <linux/random.h> 32 #include <linux/mount.h> 33 #include <linux/namei.h> 34 #include <linux/quotaops.h> 35 #include <linux/seq_file.h> 36 #include <linux/proc_fs.h> 37 #include <linux/ctype.h> 38 #include <linux/log2.h> 39 #include <linux/crc16.h> 40 #include <linux/cleancache.h> 41 #include <asm/uaccess.h> 42 43 #include <linux/kthread.h> 44 #include <linux/freezer.h> 45 46 #include "ext4.h" 47 #include "ext4_extents.h" /* Needed for trace points definition */ 48 #include "ext4_jbd2.h" 49 #include "xattr.h" 50 #include "acl.h" 51 #include "mballoc.h" 52 53 #define CREATE_TRACE_POINTS 54 #include <trace/events/ext4.h> 55 56 static struct proc_dir_entry *ext4_proc_root; 57 static struct kset *ext4_kset; 58 static struct ext4_lazy_init *ext4_li_info; 59 static struct mutex ext4_li_mtx; 60 static struct ext4_features *ext4_feat; 61 static int ext4_mballoc_ready; 62 63 static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 64 unsigned long journal_devnum); 65 static int ext4_show_options(struct seq_file *seq, struct dentry *root); 66 static int ext4_commit_super(struct super_block *sb, int sync); 67 static void ext4_mark_recovery_complete(struct super_block *sb, 68 struct ext4_super_block *es); 69 static void ext4_clear_journal_err(struct super_block *sb, 70 struct ext4_super_block *es); 71 static int ext4_sync_fs(struct super_block *sb, int wait); 72 static int ext4_remount(struct super_block *sb, int *flags, char *data); 73 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 74 static int ext4_unfreeze(struct super_block *sb); 75 static int ext4_freeze(struct super_block *sb); 76 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 77 const char *dev_name, void *data); 78 static inline int ext2_feature_set_ok(struct super_block *sb); 79 static inline int ext3_feature_set_ok(struct super_block *sb); 80 static int ext4_feature_set_ok(struct super_block *sb, int readonly); 81 static void ext4_destroy_lazyinit_thread(void); 82 static void ext4_unregister_li_request(struct super_block *sb); 83 static void ext4_clear_request_list(void); 84 static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); 85 86 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 87 static struct file_system_type ext2_fs_type = { 88 .owner = THIS_MODULE, 89 .name = "ext2", 90 .mount = ext4_mount, 91 .kill_sb = kill_block_super, 92 .fs_flags = FS_REQUIRES_DEV, 93 }; 94 MODULE_ALIAS_FS("ext2"); 95 MODULE_ALIAS("ext2"); 96 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) 97 #else 98 #define IS_EXT2_SB(sb) (0) 99 #endif 100 101 102 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 103 static struct file_system_type ext3_fs_type = { 104 .owner = THIS_MODULE, 105 .name = "ext3", 106 .mount = ext4_mount, 107 .kill_sb = kill_block_super, 108 .fs_flags = FS_REQUIRES_DEV, 109 }; 110 MODULE_ALIAS_FS("ext3"); 111 MODULE_ALIAS("ext3"); 112 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 113 #else 114 #define IS_EXT3_SB(sb) (0) 115 #endif 116 117 static int ext4_verify_csum_type(struct super_block *sb, 118 struct ext4_super_block *es) 119 { 120 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 121 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 122 return 1; 123 124 return es->s_checksum_type == EXT4_CRC32C_CHKSUM; 125 } 126 127 static __le32 ext4_superblock_csum(struct super_block *sb, 128 struct ext4_super_block *es) 129 { 130 struct ext4_sb_info *sbi = EXT4_SB(sb); 131 int offset = offsetof(struct ext4_super_block, s_checksum); 132 __u32 csum; 133 134 csum = ext4_chksum(sbi, ~0, (char *)es, offset); 135 136 return cpu_to_le32(csum); 137 } 138 139 static int ext4_superblock_csum_verify(struct super_block *sb, 140 struct ext4_super_block *es) 141 { 142 if (!ext4_has_metadata_csum(sb)) 143 return 1; 144 145 return es->s_checksum == ext4_superblock_csum(sb, es); 146 } 147 148 void ext4_superblock_csum_set(struct super_block *sb) 149 { 150 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 151 152 if (!ext4_has_metadata_csum(sb)) 153 return; 154 155 es->s_checksum = ext4_superblock_csum(sb, es); 156 } 157 158 void *ext4_kvmalloc(size_t size, gfp_t flags) 159 { 160 void *ret; 161 162 ret = kmalloc(size, flags | __GFP_NOWARN); 163 if (!ret) 164 ret = __vmalloc(size, flags, PAGE_KERNEL); 165 return ret; 166 } 167 168 void *ext4_kvzalloc(size_t size, gfp_t flags) 169 { 170 void *ret; 171 172 ret = kzalloc(size, flags | __GFP_NOWARN); 173 if (!ret) 174 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 175 return ret; 176 } 177 178 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 179 struct ext4_group_desc *bg) 180 { 181 return le32_to_cpu(bg->bg_block_bitmap_lo) | 182 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 183 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 184 } 185 186 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 187 struct ext4_group_desc *bg) 188 { 189 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 190 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 191 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 192 } 193 194 ext4_fsblk_t ext4_inode_table(struct super_block *sb, 195 struct ext4_group_desc *bg) 196 { 197 return le32_to_cpu(bg->bg_inode_table_lo) | 198 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 199 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 200 } 201 202 __u32 ext4_free_group_clusters(struct super_block *sb, 203 struct ext4_group_desc *bg) 204 { 205 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 206 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 207 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 208 } 209 210 __u32 ext4_free_inodes_count(struct super_block *sb, 211 struct ext4_group_desc *bg) 212 { 213 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 214 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 215 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 216 } 217 218 __u32 ext4_used_dirs_count(struct super_block *sb, 219 struct ext4_group_desc *bg) 220 { 221 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 222 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 223 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 224 } 225 226 __u32 ext4_itable_unused_count(struct super_block *sb, 227 struct ext4_group_desc *bg) 228 { 229 return le16_to_cpu(bg->bg_itable_unused_lo) | 230 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 231 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 232 } 233 234 void ext4_block_bitmap_set(struct super_block *sb, 235 struct ext4_group_desc *bg, ext4_fsblk_t blk) 236 { 237 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 238 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 239 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 240 } 241 242 void ext4_inode_bitmap_set(struct super_block *sb, 243 struct ext4_group_desc *bg, ext4_fsblk_t blk) 244 { 245 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 246 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 247 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 248 } 249 250 void ext4_inode_table_set(struct super_block *sb, 251 struct ext4_group_desc *bg, ext4_fsblk_t blk) 252 { 253 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 254 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 255 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 256 } 257 258 void ext4_free_group_clusters_set(struct super_block *sb, 259 struct ext4_group_desc *bg, __u32 count) 260 { 261 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 262 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 263 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 264 } 265 266 void ext4_free_inodes_set(struct super_block *sb, 267 struct ext4_group_desc *bg, __u32 count) 268 { 269 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 270 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 271 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 272 } 273 274 void ext4_used_dirs_set(struct super_block *sb, 275 struct ext4_group_desc *bg, __u32 count) 276 { 277 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 278 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 279 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 280 } 281 282 void ext4_itable_unused_set(struct super_block *sb, 283 struct ext4_group_desc *bg, __u32 count) 284 { 285 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 286 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 287 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 288 } 289 290 291 static void __save_error_info(struct super_block *sb, const char *func, 292 unsigned int line) 293 { 294 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 295 296 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 297 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 298 es->s_last_error_time = cpu_to_le32(get_seconds()); 299 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 300 es->s_last_error_line = cpu_to_le32(line); 301 if (!es->s_first_error_time) { 302 es->s_first_error_time = es->s_last_error_time; 303 strncpy(es->s_first_error_func, func, 304 sizeof(es->s_first_error_func)); 305 es->s_first_error_line = cpu_to_le32(line); 306 es->s_first_error_ino = es->s_last_error_ino; 307 es->s_first_error_block = es->s_last_error_block; 308 } 309 /* 310 * Start the daily error reporting function if it hasn't been 311 * started already 312 */ 313 if (!es->s_error_count) 314 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 315 le32_add_cpu(&es->s_error_count, 1); 316 } 317 318 static void save_error_info(struct super_block *sb, const char *func, 319 unsigned int line) 320 { 321 __save_error_info(sb, func, line); 322 ext4_commit_super(sb, 1); 323 } 324 325 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) 326 { 327 struct super_block *sb = journal->j_private; 328 struct ext4_sb_info *sbi = EXT4_SB(sb); 329 int error = is_journal_aborted(journal); 330 struct ext4_journal_cb_entry *jce; 331 332 BUG_ON(txn->t_state == T_FINISHED); 333 spin_lock(&sbi->s_md_lock); 334 while (!list_empty(&txn->t_private_list)) { 335 jce = list_entry(txn->t_private_list.next, 336 struct ext4_journal_cb_entry, jce_list); 337 list_del_init(&jce->jce_list); 338 spin_unlock(&sbi->s_md_lock); 339 jce->jce_func(sb, jce, error); 340 spin_lock(&sbi->s_md_lock); 341 } 342 spin_unlock(&sbi->s_md_lock); 343 } 344 345 /* Deal with the reporting of failure conditions on a filesystem such as 346 * inconsistencies detected or read IO failures. 347 * 348 * On ext2, we can store the error state of the filesystem in the 349 * superblock. That is not possible on ext4, because we may have other 350 * write ordering constraints on the superblock which prevent us from 351 * writing it out straight away; and given that the journal is about to 352 * be aborted, we can't rely on the current, or future, transactions to 353 * write out the superblock safely. 354 * 355 * We'll just use the jbd2_journal_abort() error code to record an error in 356 * the journal instead. On recovery, the journal will complain about 357 * that error until we've noted it down and cleared it. 358 */ 359 360 static void ext4_handle_error(struct super_block *sb) 361 { 362 if (sb->s_flags & MS_RDONLY) 363 return; 364 365 if (!test_opt(sb, ERRORS_CONT)) { 366 journal_t *journal = EXT4_SB(sb)->s_journal; 367 368 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 369 if (journal) 370 jbd2_journal_abort(journal, -EIO); 371 } 372 if (test_opt(sb, ERRORS_RO)) { 373 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 374 /* 375 * Make sure updated value of ->s_mount_flags will be visible 376 * before ->s_flags update 377 */ 378 smp_wmb(); 379 sb->s_flags |= MS_RDONLY; 380 } 381 if (test_opt(sb, ERRORS_PANIC)) 382 panic("EXT4-fs (device %s): panic forced after error\n", 383 sb->s_id); 384 } 385 386 #define ext4_error_ratelimit(sb) \ 387 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ 388 "EXT4-fs error") 389 390 void __ext4_error(struct super_block *sb, const char *function, 391 unsigned int line, const char *fmt, ...) 392 { 393 struct va_format vaf; 394 va_list args; 395 396 if (ext4_error_ratelimit(sb)) { 397 va_start(args, fmt); 398 vaf.fmt = fmt; 399 vaf.va = &args; 400 printk(KERN_CRIT 401 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 402 sb->s_id, function, line, current->comm, &vaf); 403 va_end(args); 404 } 405 save_error_info(sb, function, line); 406 ext4_handle_error(sb); 407 } 408 409 void __ext4_error_inode(struct inode *inode, const char *function, 410 unsigned int line, ext4_fsblk_t block, 411 const char *fmt, ...) 412 { 413 va_list args; 414 struct va_format vaf; 415 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 416 417 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 418 es->s_last_error_block = cpu_to_le64(block); 419 if (ext4_error_ratelimit(inode->i_sb)) { 420 va_start(args, fmt); 421 vaf.fmt = fmt; 422 vaf.va = &args; 423 if (block) 424 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 425 "inode #%lu: block %llu: comm %s: %pV\n", 426 inode->i_sb->s_id, function, line, inode->i_ino, 427 block, current->comm, &vaf); 428 else 429 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " 430 "inode #%lu: comm %s: %pV\n", 431 inode->i_sb->s_id, function, line, inode->i_ino, 432 current->comm, &vaf); 433 va_end(args); 434 } 435 save_error_info(inode->i_sb, function, line); 436 ext4_handle_error(inode->i_sb); 437 } 438 439 void __ext4_error_file(struct file *file, const char *function, 440 unsigned int line, ext4_fsblk_t block, 441 const char *fmt, ...) 442 { 443 va_list args; 444 struct va_format vaf; 445 struct ext4_super_block *es; 446 struct inode *inode = file_inode(file); 447 char pathname[80], *path; 448 449 es = EXT4_SB(inode->i_sb)->s_es; 450 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 451 if (ext4_error_ratelimit(inode->i_sb)) { 452 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 453 if (IS_ERR(path)) 454 path = "(unknown)"; 455 va_start(args, fmt); 456 vaf.fmt = fmt; 457 vaf.va = &args; 458 if (block) 459 printk(KERN_CRIT 460 "EXT4-fs error (device %s): %s:%d: inode #%lu: " 461 "block %llu: comm %s: path %s: %pV\n", 462 inode->i_sb->s_id, function, line, inode->i_ino, 463 block, current->comm, path, &vaf); 464 else 465 printk(KERN_CRIT 466 "EXT4-fs error (device %s): %s:%d: inode #%lu: " 467 "comm %s: path %s: %pV\n", 468 inode->i_sb->s_id, function, line, inode->i_ino, 469 current->comm, path, &vaf); 470 va_end(args); 471 } 472 save_error_info(inode->i_sb, function, line); 473 ext4_handle_error(inode->i_sb); 474 } 475 476 const char *ext4_decode_error(struct super_block *sb, int errno, 477 char nbuf[16]) 478 { 479 char *errstr = NULL; 480 481 switch (errno) { 482 case -EIO: 483 errstr = "IO failure"; 484 break; 485 case -ENOMEM: 486 errstr = "Out of memory"; 487 break; 488 case -EROFS: 489 if (!sb || (EXT4_SB(sb)->s_journal && 490 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 491 errstr = "Journal has aborted"; 492 else 493 errstr = "Readonly filesystem"; 494 break; 495 default: 496 /* If the caller passed in an extra buffer for unknown 497 * errors, textualise them now. Else we just return 498 * NULL. */ 499 if (nbuf) { 500 /* Check for truncated error codes... */ 501 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 502 errstr = nbuf; 503 } 504 break; 505 } 506 507 return errstr; 508 } 509 510 /* __ext4_std_error decodes expected errors from journaling functions 511 * automatically and invokes the appropriate error response. */ 512 513 void __ext4_std_error(struct super_block *sb, const char *function, 514 unsigned int line, int errno) 515 { 516 char nbuf[16]; 517 const char *errstr; 518 519 /* Special case: if the error is EROFS, and we're not already 520 * inside a transaction, then there's really no point in logging 521 * an error. */ 522 if (errno == -EROFS && journal_current_handle() == NULL && 523 (sb->s_flags & MS_RDONLY)) 524 return; 525 526 if (ext4_error_ratelimit(sb)) { 527 errstr = ext4_decode_error(sb, errno, nbuf); 528 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", 529 sb->s_id, function, line, errstr); 530 } 531 532 save_error_info(sb, function, line); 533 ext4_handle_error(sb); 534 } 535 536 /* 537 * ext4_abort is a much stronger failure handler than ext4_error. The 538 * abort function may be used to deal with unrecoverable failures such 539 * as journal IO errors or ENOMEM at a critical moment in log management. 540 * 541 * We unconditionally force the filesystem into an ABORT|READONLY state, 542 * unless the error response on the fs has been set to panic in which 543 * case we take the easy way out and panic immediately. 544 */ 545 546 void __ext4_abort(struct super_block *sb, const char *function, 547 unsigned int line, const char *fmt, ...) 548 { 549 va_list args; 550 551 save_error_info(sb, function, line); 552 va_start(args, fmt); 553 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, 554 function, line); 555 vprintk(fmt, args); 556 printk("\n"); 557 va_end(args); 558 559 if ((sb->s_flags & MS_RDONLY) == 0) { 560 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 561 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 562 /* 563 * Make sure updated value of ->s_mount_flags will be visible 564 * before ->s_flags update 565 */ 566 smp_wmb(); 567 sb->s_flags |= MS_RDONLY; 568 if (EXT4_SB(sb)->s_journal) 569 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 570 save_error_info(sb, function, line); 571 } 572 if (test_opt(sb, ERRORS_PANIC)) 573 panic("EXT4-fs panic from previous error\n"); 574 } 575 576 void __ext4_msg(struct super_block *sb, 577 const char *prefix, const char *fmt, ...) 578 { 579 struct va_format vaf; 580 va_list args; 581 582 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) 583 return; 584 585 va_start(args, fmt); 586 vaf.fmt = fmt; 587 vaf.va = &args; 588 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 589 va_end(args); 590 } 591 592 void __ext4_warning(struct super_block *sb, const char *function, 593 unsigned int line, const char *fmt, ...) 594 { 595 struct va_format vaf; 596 va_list args; 597 598 if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), 599 "EXT4-fs warning")) 600 return; 601 602 va_start(args, fmt); 603 vaf.fmt = fmt; 604 vaf.va = &args; 605 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", 606 sb->s_id, function, line, &vaf); 607 va_end(args); 608 } 609 610 void __ext4_grp_locked_error(const char *function, unsigned int line, 611 struct super_block *sb, ext4_group_t grp, 612 unsigned long ino, ext4_fsblk_t block, 613 const char *fmt, ...) 614 __releases(bitlock) 615 __acquires(bitlock) 616 { 617 struct va_format vaf; 618 va_list args; 619 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 620 621 es->s_last_error_ino = cpu_to_le32(ino); 622 es->s_last_error_block = cpu_to_le64(block); 623 __save_error_info(sb, function, line); 624 625 if (ext4_error_ratelimit(sb)) { 626 va_start(args, fmt); 627 vaf.fmt = fmt; 628 vaf.va = &args; 629 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", 630 sb->s_id, function, line, grp); 631 if (ino) 632 printk(KERN_CONT "inode %lu: ", ino); 633 if (block) 634 printk(KERN_CONT "block %llu:", 635 (unsigned long long) block); 636 printk(KERN_CONT "%pV\n", &vaf); 637 va_end(args); 638 } 639 640 if (test_opt(sb, ERRORS_CONT)) { 641 ext4_commit_super(sb, 0); 642 return; 643 } 644 645 ext4_unlock_group(sb, grp); 646 ext4_handle_error(sb); 647 /* 648 * We only get here in the ERRORS_RO case; relocking the group 649 * may be dangerous, but nothing bad will happen since the 650 * filesystem will have already been marked read/only and the 651 * journal has been aborted. We return 1 as a hint to callers 652 * who might what to use the return value from 653 * ext4_grp_locked_error() to distinguish between the 654 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 655 * aggressively from the ext4 function in question, with a 656 * more appropriate error code. 657 */ 658 ext4_lock_group(sb, grp); 659 return; 660 } 661 662 void ext4_update_dynamic_rev(struct super_block *sb) 663 { 664 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 665 666 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 667 return; 668 669 ext4_warning(sb, 670 "updating to rev %d because of new feature flag, " 671 "running e2fsck is recommended", 672 EXT4_DYNAMIC_REV); 673 674 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 675 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 676 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 677 /* leave es->s_feature_*compat flags alone */ 678 /* es->s_uuid will be set by e2fsck if empty */ 679 680 /* 681 * The rest of the superblock fields should be zero, and if not it 682 * means they are likely already in use, so leave them alone. We 683 * can leave it up to e2fsck to clean up any inconsistencies there. 684 */ 685 } 686 687 /* 688 * Open the external journal device 689 */ 690 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 691 { 692 struct block_device *bdev; 693 char b[BDEVNAME_SIZE]; 694 695 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 696 if (IS_ERR(bdev)) 697 goto fail; 698 return bdev; 699 700 fail: 701 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 702 __bdevname(dev, b), PTR_ERR(bdev)); 703 return NULL; 704 } 705 706 /* 707 * Release the journal device 708 */ 709 static void ext4_blkdev_put(struct block_device *bdev) 710 { 711 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 712 } 713 714 static void ext4_blkdev_remove(struct ext4_sb_info *sbi) 715 { 716 struct block_device *bdev; 717 bdev = sbi->journal_bdev; 718 if (bdev) { 719 ext4_blkdev_put(bdev); 720 sbi->journal_bdev = NULL; 721 } 722 } 723 724 static inline struct inode *orphan_list_entry(struct list_head *l) 725 { 726 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 727 } 728 729 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 730 { 731 struct list_head *l; 732 733 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 734 le32_to_cpu(sbi->s_es->s_last_orphan)); 735 736 printk(KERN_ERR "sb_info orphan list:\n"); 737 list_for_each(l, &sbi->s_orphan) { 738 struct inode *inode = orphan_list_entry(l); 739 printk(KERN_ERR " " 740 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 741 inode->i_sb->s_id, inode->i_ino, inode, 742 inode->i_mode, inode->i_nlink, 743 NEXT_ORPHAN(inode)); 744 } 745 } 746 747 static void ext4_put_super(struct super_block *sb) 748 { 749 struct ext4_sb_info *sbi = EXT4_SB(sb); 750 struct ext4_super_block *es = sbi->s_es; 751 int i, err; 752 753 ext4_unregister_li_request(sb); 754 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 755 756 flush_workqueue(sbi->rsv_conversion_wq); 757 destroy_workqueue(sbi->rsv_conversion_wq); 758 759 if (sbi->s_journal) { 760 err = jbd2_journal_destroy(sbi->s_journal); 761 sbi->s_journal = NULL; 762 if (err < 0) 763 ext4_abort(sb, "Couldn't clean up the journal"); 764 } 765 766 ext4_es_unregister_shrinker(sbi); 767 del_timer_sync(&sbi->s_err_report); 768 ext4_release_system_zone(sb); 769 ext4_mb_release(sb); 770 ext4_ext_release(sb); 771 ext4_xattr_put_super(sb); 772 773 if (!(sb->s_flags & MS_RDONLY)) { 774 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 775 es->s_state = cpu_to_le16(sbi->s_mount_state); 776 } 777 if (!(sb->s_flags & MS_RDONLY)) 778 ext4_commit_super(sb, 1); 779 780 if (sbi->s_proc) { 781 remove_proc_entry("options", sbi->s_proc); 782 remove_proc_entry(sb->s_id, ext4_proc_root); 783 } 784 kobject_del(&sbi->s_kobj); 785 786 for (i = 0; i < sbi->s_gdb_count; i++) 787 brelse(sbi->s_group_desc[i]); 788 kvfree(sbi->s_group_desc); 789 kvfree(sbi->s_flex_groups); 790 percpu_counter_destroy(&sbi->s_freeclusters_counter); 791 percpu_counter_destroy(&sbi->s_freeinodes_counter); 792 percpu_counter_destroy(&sbi->s_dirs_counter); 793 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 794 brelse(sbi->s_sbh); 795 #ifdef CONFIG_QUOTA 796 for (i = 0; i < EXT4_MAXQUOTAS; i++) 797 kfree(sbi->s_qf_names[i]); 798 #endif 799 800 /* Debugging code just in case the in-memory inode orphan list 801 * isn't empty. The on-disk one can be non-empty if we've 802 * detected an error and taken the fs readonly, but the 803 * in-memory list had better be clean by this point. */ 804 if (!list_empty(&sbi->s_orphan)) 805 dump_orphan_list(sb, sbi); 806 J_ASSERT(list_empty(&sbi->s_orphan)); 807 808 invalidate_bdev(sb->s_bdev); 809 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 810 /* 811 * Invalidate the journal device's buffers. We don't want them 812 * floating about in memory - the physical journal device may 813 * hotswapped, and it breaks the `ro-after' testing code. 814 */ 815 sync_blockdev(sbi->journal_bdev); 816 invalidate_bdev(sbi->journal_bdev); 817 ext4_blkdev_remove(sbi); 818 } 819 if (sbi->s_mb_cache) { 820 ext4_xattr_destroy_cache(sbi->s_mb_cache); 821 sbi->s_mb_cache = NULL; 822 } 823 if (sbi->s_mmp_tsk) 824 kthread_stop(sbi->s_mmp_tsk); 825 sb->s_fs_info = NULL; 826 /* 827 * Now that we are completely done shutting down the 828 * superblock, we need to actually destroy the kobject. 829 */ 830 kobject_put(&sbi->s_kobj); 831 wait_for_completion(&sbi->s_kobj_unregister); 832 if (sbi->s_chksum_driver) 833 crypto_free_shash(sbi->s_chksum_driver); 834 kfree(sbi->s_blockgroup_lock); 835 kfree(sbi); 836 } 837 838 static struct kmem_cache *ext4_inode_cachep; 839 840 /* 841 * Called inside transaction, so use GFP_NOFS 842 */ 843 static struct inode *ext4_alloc_inode(struct super_block *sb) 844 { 845 struct ext4_inode_info *ei; 846 847 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 848 if (!ei) 849 return NULL; 850 851 ei->vfs_inode.i_version = 1; 852 spin_lock_init(&ei->i_raw_lock); 853 INIT_LIST_HEAD(&ei->i_prealloc_list); 854 spin_lock_init(&ei->i_prealloc_lock); 855 ext4_es_init_tree(&ei->i_es_tree); 856 rwlock_init(&ei->i_es_lock); 857 INIT_LIST_HEAD(&ei->i_es_list); 858 ei->i_es_all_nr = 0; 859 ei->i_es_shk_nr = 0; 860 ei->i_es_shrink_lblk = 0; 861 ei->i_reserved_data_blocks = 0; 862 ei->i_reserved_meta_blocks = 0; 863 ei->i_allocated_meta_blocks = 0; 864 ei->i_da_metadata_calc_len = 0; 865 ei->i_da_metadata_calc_last_lblock = 0; 866 spin_lock_init(&(ei->i_block_reservation_lock)); 867 #ifdef CONFIG_QUOTA 868 ei->i_reserved_quota = 0; 869 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); 870 #endif 871 ei->jinode = NULL; 872 INIT_LIST_HEAD(&ei->i_rsv_conversion_list); 873 spin_lock_init(&ei->i_completed_io_lock); 874 ei->i_sync_tid = 0; 875 ei->i_datasync_tid = 0; 876 atomic_set(&ei->i_ioend_count, 0); 877 atomic_set(&ei->i_unwritten, 0); 878 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); 879 #ifdef CONFIG_EXT4_FS_ENCRYPTION 880 ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID; 881 #endif 882 883 return &ei->vfs_inode; 884 } 885 886 static int ext4_drop_inode(struct inode *inode) 887 { 888 int drop = generic_drop_inode(inode); 889 890 trace_ext4_drop_inode(inode, drop); 891 return drop; 892 } 893 894 static void ext4_i_callback(struct rcu_head *head) 895 { 896 struct inode *inode = container_of(head, struct inode, i_rcu); 897 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 898 } 899 900 static void ext4_destroy_inode(struct inode *inode) 901 { 902 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 903 ext4_msg(inode->i_sb, KERN_ERR, 904 "Inode %lu (%p): orphan list check failed!", 905 inode->i_ino, EXT4_I(inode)); 906 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 907 EXT4_I(inode), sizeof(struct ext4_inode_info), 908 true); 909 dump_stack(); 910 } 911 call_rcu(&inode->i_rcu, ext4_i_callback); 912 } 913 914 static void init_once(void *foo) 915 { 916 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 917 918 INIT_LIST_HEAD(&ei->i_orphan); 919 init_rwsem(&ei->xattr_sem); 920 init_rwsem(&ei->i_data_sem); 921 inode_init_once(&ei->vfs_inode); 922 } 923 924 static int __init init_inodecache(void) 925 { 926 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 927 sizeof(struct ext4_inode_info), 928 0, (SLAB_RECLAIM_ACCOUNT| 929 SLAB_MEM_SPREAD), 930 init_once); 931 if (ext4_inode_cachep == NULL) 932 return -ENOMEM; 933 return 0; 934 } 935 936 static void destroy_inodecache(void) 937 { 938 /* 939 * Make sure all delayed rcu free inodes are flushed before we 940 * destroy cache. 941 */ 942 rcu_barrier(); 943 kmem_cache_destroy(ext4_inode_cachep); 944 } 945 946 void ext4_clear_inode(struct inode *inode) 947 { 948 invalidate_inode_buffers(inode); 949 clear_inode(inode); 950 dquot_drop(inode); 951 ext4_discard_preallocations(inode); 952 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 953 if (EXT4_I(inode)->jinode) { 954 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 955 EXT4_I(inode)->jinode); 956 jbd2_free_inode(EXT4_I(inode)->jinode); 957 EXT4_I(inode)->jinode = NULL; 958 } 959 } 960 961 static struct inode *ext4_nfs_get_inode(struct super_block *sb, 962 u64 ino, u32 generation) 963 { 964 struct inode *inode; 965 966 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 967 return ERR_PTR(-ESTALE); 968 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 969 return ERR_PTR(-ESTALE); 970 971 /* iget isn't really right if the inode is currently unallocated!! 972 * 973 * ext4_read_inode will return a bad_inode if the inode had been 974 * deleted, so we should be safe. 975 * 976 * Currently we don't know the generation for parent directory, so 977 * a generation of 0 means "accept any" 978 */ 979 inode = ext4_iget_normal(sb, ino); 980 if (IS_ERR(inode)) 981 return ERR_CAST(inode); 982 if (generation && inode->i_generation != generation) { 983 iput(inode); 984 return ERR_PTR(-ESTALE); 985 } 986 987 return inode; 988 } 989 990 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 991 int fh_len, int fh_type) 992 { 993 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 994 ext4_nfs_get_inode); 995 } 996 997 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 998 int fh_len, int fh_type) 999 { 1000 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1001 ext4_nfs_get_inode); 1002 } 1003 1004 /* 1005 * Try to release metadata pages (indirect blocks, directories) which are 1006 * mapped via the block device. Since these pages could have journal heads 1007 * which would prevent try_to_free_buffers() from freeing them, we must use 1008 * jbd2 layer's try_to_free_buffers() function to release them. 1009 */ 1010 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1011 gfp_t wait) 1012 { 1013 journal_t *journal = EXT4_SB(sb)->s_journal; 1014 1015 WARN_ON(PageChecked(page)); 1016 if (!page_has_buffers(page)) 1017 return 0; 1018 if (journal) 1019 return jbd2_journal_try_to_free_buffers(journal, page, 1020 wait & ~__GFP_WAIT); 1021 return try_to_free_buffers(page); 1022 } 1023 1024 #ifdef CONFIG_QUOTA 1025 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1026 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1027 1028 static int ext4_write_dquot(struct dquot *dquot); 1029 static int ext4_acquire_dquot(struct dquot *dquot); 1030 static int ext4_release_dquot(struct dquot *dquot); 1031 static int ext4_mark_dquot_dirty(struct dquot *dquot); 1032 static int ext4_write_info(struct super_block *sb, int type); 1033 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1034 struct path *path); 1035 static int ext4_quota_off(struct super_block *sb, int type); 1036 static int ext4_quota_on_mount(struct super_block *sb, int type); 1037 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1038 size_t len, loff_t off); 1039 static ssize_t ext4_quota_write(struct super_block *sb, int type, 1040 const char *data, size_t len, loff_t off); 1041 static int ext4_quota_enable(struct super_block *sb, int type, int format_id, 1042 unsigned int flags); 1043 static int ext4_enable_quotas(struct super_block *sb); 1044 1045 static struct dquot **ext4_get_dquots(struct inode *inode) 1046 { 1047 return EXT4_I(inode)->i_dquot; 1048 } 1049 1050 static const struct dquot_operations ext4_quota_operations = { 1051 .get_reserved_space = ext4_get_reserved_space, 1052 .write_dquot = ext4_write_dquot, 1053 .acquire_dquot = ext4_acquire_dquot, 1054 .release_dquot = ext4_release_dquot, 1055 .mark_dirty = ext4_mark_dquot_dirty, 1056 .write_info = ext4_write_info, 1057 .alloc_dquot = dquot_alloc, 1058 .destroy_dquot = dquot_destroy, 1059 }; 1060 1061 static const struct quotactl_ops ext4_qctl_operations = { 1062 .quota_on = ext4_quota_on, 1063 .quota_off = ext4_quota_off, 1064 .quota_sync = dquot_quota_sync, 1065 .get_state = dquot_get_state, 1066 .set_info = dquot_set_dqinfo, 1067 .get_dqblk = dquot_get_dqblk, 1068 .set_dqblk = dquot_set_dqblk 1069 }; 1070 #endif 1071 1072 static const struct super_operations ext4_sops = { 1073 .alloc_inode = ext4_alloc_inode, 1074 .destroy_inode = ext4_destroy_inode, 1075 .write_inode = ext4_write_inode, 1076 .dirty_inode = ext4_dirty_inode, 1077 .drop_inode = ext4_drop_inode, 1078 .evict_inode = ext4_evict_inode, 1079 .put_super = ext4_put_super, 1080 .sync_fs = ext4_sync_fs, 1081 .freeze_fs = ext4_freeze, 1082 .unfreeze_fs = ext4_unfreeze, 1083 .statfs = ext4_statfs, 1084 .remount_fs = ext4_remount, 1085 .show_options = ext4_show_options, 1086 #ifdef CONFIG_QUOTA 1087 .quota_read = ext4_quota_read, 1088 .quota_write = ext4_quota_write, 1089 .get_dquots = ext4_get_dquots, 1090 #endif 1091 .bdev_try_to_free_page = bdev_try_to_free_page, 1092 }; 1093 1094 static const struct export_operations ext4_export_ops = { 1095 .fh_to_dentry = ext4_fh_to_dentry, 1096 .fh_to_parent = ext4_fh_to_parent, 1097 .get_parent = ext4_get_parent, 1098 }; 1099 1100 enum { 1101 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1102 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1103 Opt_nouid32, Opt_debug, Opt_removed, 1104 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1105 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, 1106 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, 1107 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, 1108 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1109 Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption, 1110 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1111 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1112 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1113 Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, 1114 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1115 Opt_lazytime, Opt_nolazytime, 1116 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1117 Opt_inode_readahead_blks, Opt_journal_ioprio, 1118 Opt_dioread_nolock, Opt_dioread_lock, 1119 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1120 Opt_max_dir_size_kb, Opt_nojournal_checksum, 1121 }; 1122 1123 static const match_table_t tokens = { 1124 {Opt_bsd_df, "bsddf"}, 1125 {Opt_minix_df, "minixdf"}, 1126 {Opt_grpid, "grpid"}, 1127 {Opt_grpid, "bsdgroups"}, 1128 {Opt_nogrpid, "nogrpid"}, 1129 {Opt_nogrpid, "sysvgroups"}, 1130 {Opt_resgid, "resgid=%u"}, 1131 {Opt_resuid, "resuid=%u"}, 1132 {Opt_sb, "sb=%u"}, 1133 {Opt_err_cont, "errors=continue"}, 1134 {Opt_err_panic, "errors=panic"}, 1135 {Opt_err_ro, "errors=remount-ro"}, 1136 {Opt_nouid32, "nouid32"}, 1137 {Opt_debug, "debug"}, 1138 {Opt_removed, "oldalloc"}, 1139 {Opt_removed, "orlov"}, 1140 {Opt_user_xattr, "user_xattr"}, 1141 {Opt_nouser_xattr, "nouser_xattr"}, 1142 {Opt_acl, "acl"}, 1143 {Opt_noacl, "noacl"}, 1144 {Opt_noload, "norecovery"}, 1145 {Opt_noload, "noload"}, 1146 {Opt_removed, "nobh"}, 1147 {Opt_removed, "bh"}, 1148 {Opt_commit, "commit=%u"}, 1149 {Opt_min_batch_time, "min_batch_time=%u"}, 1150 {Opt_max_batch_time, "max_batch_time=%u"}, 1151 {Opt_journal_dev, "journal_dev=%u"}, 1152 {Opt_journal_path, "journal_path=%s"}, 1153 {Opt_journal_checksum, "journal_checksum"}, 1154 {Opt_nojournal_checksum, "nojournal_checksum"}, 1155 {Opt_journal_async_commit, "journal_async_commit"}, 1156 {Opt_abort, "abort"}, 1157 {Opt_data_journal, "data=journal"}, 1158 {Opt_data_ordered, "data=ordered"}, 1159 {Opt_data_writeback, "data=writeback"}, 1160 {Opt_data_err_abort, "data_err=abort"}, 1161 {Opt_data_err_ignore, "data_err=ignore"}, 1162 {Opt_offusrjquota, "usrjquota="}, 1163 {Opt_usrjquota, "usrjquota=%s"}, 1164 {Opt_offgrpjquota, "grpjquota="}, 1165 {Opt_grpjquota, "grpjquota=%s"}, 1166 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1167 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1168 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1169 {Opt_grpquota, "grpquota"}, 1170 {Opt_noquota, "noquota"}, 1171 {Opt_quota, "quota"}, 1172 {Opt_usrquota, "usrquota"}, 1173 {Opt_barrier, "barrier=%u"}, 1174 {Opt_barrier, "barrier"}, 1175 {Opt_nobarrier, "nobarrier"}, 1176 {Opt_i_version, "i_version"}, 1177 {Opt_dax, "dax"}, 1178 {Opt_stripe, "stripe=%u"}, 1179 {Opt_delalloc, "delalloc"}, 1180 {Opt_lazytime, "lazytime"}, 1181 {Opt_nolazytime, "nolazytime"}, 1182 {Opt_nodelalloc, "nodelalloc"}, 1183 {Opt_removed, "mblk_io_submit"}, 1184 {Opt_removed, "nomblk_io_submit"}, 1185 {Opt_block_validity, "block_validity"}, 1186 {Opt_noblock_validity, "noblock_validity"}, 1187 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1188 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1189 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1190 {Opt_auto_da_alloc, "auto_da_alloc"}, 1191 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1192 {Opt_dioread_nolock, "dioread_nolock"}, 1193 {Opt_dioread_lock, "dioread_lock"}, 1194 {Opt_discard, "discard"}, 1195 {Opt_nodiscard, "nodiscard"}, 1196 {Opt_init_itable, "init_itable=%u"}, 1197 {Opt_init_itable, "init_itable"}, 1198 {Opt_noinit_itable, "noinit_itable"}, 1199 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, 1200 {Opt_test_dummy_encryption, "test_dummy_encryption"}, 1201 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1202 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1203 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1204 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ 1205 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ 1206 {Opt_err, NULL}, 1207 }; 1208 1209 static ext4_fsblk_t get_sb_block(void **data) 1210 { 1211 ext4_fsblk_t sb_block; 1212 char *options = (char *) *data; 1213 1214 if (!options || strncmp(options, "sb=", 3) != 0) 1215 return 1; /* Default location */ 1216 1217 options += 3; 1218 /* TODO: use simple_strtoll with >32bit ext4 */ 1219 sb_block = simple_strtoul(options, &options, 0); 1220 if (*options && *options != ',') { 1221 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1222 (char *) *data); 1223 return 1; 1224 } 1225 if (*options == ',') 1226 options++; 1227 *data = (void *) options; 1228 1229 return sb_block; 1230 } 1231 1232 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1233 static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1234 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1235 1236 #ifdef CONFIG_QUOTA 1237 static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1238 { 1239 struct ext4_sb_info *sbi = EXT4_SB(sb); 1240 char *qname; 1241 int ret = -1; 1242 1243 if (sb_any_quota_loaded(sb) && 1244 !sbi->s_qf_names[qtype]) { 1245 ext4_msg(sb, KERN_ERR, 1246 "Cannot change journaled " 1247 "quota options when quota turned on"); 1248 return -1; 1249 } 1250 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1251 ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " 1252 "when QUOTA feature is enabled"); 1253 return -1; 1254 } 1255 qname = match_strdup(args); 1256 if (!qname) { 1257 ext4_msg(sb, KERN_ERR, 1258 "Not enough memory for storing quotafile name"); 1259 return -1; 1260 } 1261 if (sbi->s_qf_names[qtype]) { 1262 if (strcmp(sbi->s_qf_names[qtype], qname) == 0) 1263 ret = 1; 1264 else 1265 ext4_msg(sb, KERN_ERR, 1266 "%s quota file already specified", 1267 QTYPE2NAME(qtype)); 1268 goto errout; 1269 } 1270 if (strchr(qname, '/')) { 1271 ext4_msg(sb, KERN_ERR, 1272 "quotafile must be on filesystem root"); 1273 goto errout; 1274 } 1275 sbi->s_qf_names[qtype] = qname; 1276 set_opt(sb, QUOTA); 1277 return 1; 1278 errout: 1279 kfree(qname); 1280 return ret; 1281 } 1282 1283 static int clear_qf_name(struct super_block *sb, int qtype) 1284 { 1285 1286 struct ext4_sb_info *sbi = EXT4_SB(sb); 1287 1288 if (sb_any_quota_loaded(sb) && 1289 sbi->s_qf_names[qtype]) { 1290 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1291 " when quota turned on"); 1292 return -1; 1293 } 1294 kfree(sbi->s_qf_names[qtype]); 1295 sbi->s_qf_names[qtype] = NULL; 1296 return 1; 1297 } 1298 #endif 1299 1300 #define MOPT_SET 0x0001 1301 #define MOPT_CLEAR 0x0002 1302 #define MOPT_NOSUPPORT 0x0004 1303 #define MOPT_EXPLICIT 0x0008 1304 #define MOPT_CLEAR_ERR 0x0010 1305 #define MOPT_GTE0 0x0020 1306 #ifdef CONFIG_QUOTA 1307 #define MOPT_Q 0 1308 #define MOPT_QFMT 0x0040 1309 #else 1310 #define MOPT_Q MOPT_NOSUPPORT 1311 #define MOPT_QFMT MOPT_NOSUPPORT 1312 #endif 1313 #define MOPT_DATAJ 0x0080 1314 #define MOPT_NO_EXT2 0x0100 1315 #define MOPT_NO_EXT3 0x0200 1316 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) 1317 #define MOPT_STRING 0x0400 1318 1319 static const struct mount_opts { 1320 int token; 1321 int mount_opt; 1322 int flags; 1323 } ext4_mount_opts[] = { 1324 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, 1325 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, 1326 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, 1327 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, 1328 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, 1329 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, 1330 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, 1331 MOPT_EXT4_ONLY | MOPT_SET}, 1332 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, 1333 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1334 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, 1335 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, 1336 {Opt_delalloc, EXT4_MOUNT_DELALLOC, 1337 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, 1338 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, 1339 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1340 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, 1341 MOPT_EXT4_ONLY | MOPT_CLEAR}, 1342 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, 1343 MOPT_EXT4_ONLY | MOPT_SET}, 1344 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1345 EXT4_MOUNT_JOURNAL_CHECKSUM), 1346 MOPT_EXT4_ONLY | MOPT_SET}, 1347 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, 1348 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, 1349 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, 1350 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, 1351 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, 1352 MOPT_NO_EXT2 | MOPT_SET}, 1353 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, 1354 MOPT_NO_EXT2 | MOPT_CLEAR}, 1355 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, 1356 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, 1357 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, 1358 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, 1359 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, 1360 {Opt_commit, 0, MOPT_GTE0}, 1361 {Opt_max_batch_time, 0, MOPT_GTE0}, 1362 {Opt_min_batch_time, 0, MOPT_GTE0}, 1363 {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1364 {Opt_init_itable, 0, MOPT_GTE0}, 1365 {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET}, 1366 {Opt_stripe, 0, MOPT_GTE0}, 1367 {Opt_resuid, 0, MOPT_GTE0}, 1368 {Opt_resgid, 0, MOPT_GTE0}, 1369 {Opt_journal_dev, 0, MOPT_GTE0}, 1370 {Opt_journal_path, 0, MOPT_STRING}, 1371 {Opt_journal_ioprio, 0, MOPT_GTE0}, 1372 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1373 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, 1374 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, 1375 MOPT_NO_EXT2 | MOPT_DATAJ}, 1376 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1377 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1378 #ifdef CONFIG_EXT4_FS_POSIX_ACL 1379 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, 1380 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, 1381 #else 1382 {Opt_acl, 0, MOPT_NOSUPPORT}, 1383 {Opt_noacl, 0, MOPT_NOSUPPORT}, 1384 #endif 1385 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, 1386 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, 1387 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, 1388 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, 1389 MOPT_SET | MOPT_Q}, 1390 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, 1391 MOPT_SET | MOPT_Q}, 1392 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | 1393 EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, 1394 {Opt_usrjquota, 0, MOPT_Q}, 1395 {Opt_grpjquota, 0, MOPT_Q}, 1396 {Opt_offusrjquota, 0, MOPT_Q}, 1397 {Opt_offgrpjquota, 0, MOPT_Q}, 1398 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, 1399 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1400 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1401 {Opt_max_dir_size_kb, 0, MOPT_GTE0}, 1402 {Opt_test_dummy_encryption, 0, MOPT_GTE0}, 1403 {Opt_err, 0, 0} 1404 }; 1405 1406 static int handle_mount_opt(struct super_block *sb, char *opt, int token, 1407 substring_t *args, unsigned long *journal_devnum, 1408 unsigned int *journal_ioprio, int is_remount) 1409 { 1410 struct ext4_sb_info *sbi = EXT4_SB(sb); 1411 const struct mount_opts *m; 1412 kuid_t uid; 1413 kgid_t gid; 1414 int arg = 0; 1415 1416 #ifdef CONFIG_QUOTA 1417 if (token == Opt_usrjquota) 1418 return set_qf_name(sb, USRQUOTA, &args[0]); 1419 else if (token == Opt_grpjquota) 1420 return set_qf_name(sb, GRPQUOTA, &args[0]); 1421 else if (token == Opt_offusrjquota) 1422 return clear_qf_name(sb, USRQUOTA); 1423 else if (token == Opt_offgrpjquota) 1424 return clear_qf_name(sb, GRPQUOTA); 1425 #endif 1426 switch (token) { 1427 case Opt_noacl: 1428 case Opt_nouser_xattr: 1429 ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); 1430 break; 1431 case Opt_sb: 1432 return 1; /* handled by get_sb_block() */ 1433 case Opt_removed: 1434 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); 1435 return 1; 1436 case Opt_abort: 1437 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1438 return 1; 1439 case Opt_i_version: 1440 sb->s_flags |= MS_I_VERSION; 1441 return 1; 1442 case Opt_lazytime: 1443 sb->s_flags |= MS_LAZYTIME; 1444 return 1; 1445 case Opt_nolazytime: 1446 sb->s_flags &= ~MS_LAZYTIME; 1447 return 1; 1448 } 1449 1450 for (m = ext4_mount_opts; m->token != Opt_err; m++) 1451 if (token == m->token) 1452 break; 1453 1454 if (m->token == Opt_err) { 1455 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " 1456 "or missing value", opt); 1457 return -1; 1458 } 1459 1460 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { 1461 ext4_msg(sb, KERN_ERR, 1462 "Mount option \"%s\" incompatible with ext2", opt); 1463 return -1; 1464 } 1465 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { 1466 ext4_msg(sb, KERN_ERR, 1467 "Mount option \"%s\" incompatible with ext3", opt); 1468 return -1; 1469 } 1470 1471 if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) 1472 return -1; 1473 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) 1474 return -1; 1475 if (m->flags & MOPT_EXPLICIT) 1476 set_opt2(sb, EXPLICIT_DELALLOC); 1477 if (m->flags & MOPT_CLEAR_ERR) 1478 clear_opt(sb, ERRORS_MASK); 1479 if (token == Opt_noquota && sb_any_quota_loaded(sb)) { 1480 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1481 "options when quota turned on"); 1482 return -1; 1483 } 1484 1485 if (m->flags & MOPT_NOSUPPORT) { 1486 ext4_msg(sb, KERN_ERR, "%s option not supported", opt); 1487 } else if (token == Opt_commit) { 1488 if (arg == 0) 1489 arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1490 sbi->s_commit_interval = HZ * arg; 1491 } else if (token == Opt_max_batch_time) { 1492 sbi->s_max_batch_time = arg; 1493 } else if (token == Opt_min_batch_time) { 1494 sbi->s_min_batch_time = arg; 1495 } else if (token == Opt_inode_readahead_blks) { 1496 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { 1497 ext4_msg(sb, KERN_ERR, 1498 "EXT4-fs: inode_readahead_blks must be " 1499 "0 or a power of 2 smaller than 2^31"); 1500 return -1; 1501 } 1502 sbi->s_inode_readahead_blks = arg; 1503 } else if (token == Opt_init_itable) { 1504 set_opt(sb, INIT_INODE_TABLE); 1505 if (!args->from) 1506 arg = EXT4_DEF_LI_WAIT_MULT; 1507 sbi->s_li_wait_mult = arg; 1508 } else if (token == Opt_max_dir_size_kb) { 1509 sbi->s_max_dir_size_kb = arg; 1510 } else if (token == Opt_stripe) { 1511 sbi->s_stripe = arg; 1512 } else if (token == Opt_resuid) { 1513 uid = make_kuid(current_user_ns(), arg); 1514 if (!uid_valid(uid)) { 1515 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); 1516 return -1; 1517 } 1518 sbi->s_resuid = uid; 1519 } else if (token == Opt_resgid) { 1520 gid = make_kgid(current_user_ns(), arg); 1521 if (!gid_valid(gid)) { 1522 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); 1523 return -1; 1524 } 1525 sbi->s_resgid = gid; 1526 } else if (token == Opt_journal_dev) { 1527 if (is_remount) { 1528 ext4_msg(sb, KERN_ERR, 1529 "Cannot specify journal on remount"); 1530 return -1; 1531 } 1532 *journal_devnum = arg; 1533 } else if (token == Opt_journal_path) { 1534 char *journal_path; 1535 struct inode *journal_inode; 1536 struct path path; 1537 int error; 1538 1539 if (is_remount) { 1540 ext4_msg(sb, KERN_ERR, 1541 "Cannot specify journal on remount"); 1542 return -1; 1543 } 1544 journal_path = match_strdup(&args[0]); 1545 if (!journal_path) { 1546 ext4_msg(sb, KERN_ERR, "error: could not dup " 1547 "journal device string"); 1548 return -1; 1549 } 1550 1551 error = kern_path(journal_path, LOOKUP_FOLLOW, &path); 1552 if (error) { 1553 ext4_msg(sb, KERN_ERR, "error: could not find " 1554 "journal device path: error %d", error); 1555 kfree(journal_path); 1556 return -1; 1557 } 1558 1559 journal_inode = d_inode(path.dentry); 1560 if (!S_ISBLK(journal_inode->i_mode)) { 1561 ext4_msg(sb, KERN_ERR, "error: journal path %s " 1562 "is not a block device", journal_path); 1563 path_put(&path); 1564 kfree(journal_path); 1565 return -1; 1566 } 1567 1568 *journal_devnum = new_encode_dev(journal_inode->i_rdev); 1569 path_put(&path); 1570 kfree(journal_path); 1571 } else if (token == Opt_journal_ioprio) { 1572 if (arg > 7) { 1573 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" 1574 " (must be 0-7)"); 1575 return -1; 1576 } 1577 *journal_ioprio = 1578 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); 1579 } else if (token == Opt_test_dummy_encryption) { 1580 #ifdef CONFIG_EXT4_FS_ENCRYPTION 1581 sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION; 1582 ext4_msg(sb, KERN_WARNING, 1583 "Test dummy encryption mode enabled"); 1584 #else 1585 ext4_msg(sb, KERN_WARNING, 1586 "Test dummy encryption mount option ignored"); 1587 #endif 1588 } else if (m->flags & MOPT_DATAJ) { 1589 if (is_remount) { 1590 if (!sbi->s_journal) 1591 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); 1592 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { 1593 ext4_msg(sb, KERN_ERR, 1594 "Cannot change data mode on remount"); 1595 return -1; 1596 } 1597 } else { 1598 clear_opt(sb, DATA_FLAGS); 1599 sbi->s_mount_opt |= m->mount_opt; 1600 } 1601 #ifdef CONFIG_QUOTA 1602 } else if (m->flags & MOPT_QFMT) { 1603 if (sb_any_quota_loaded(sb) && 1604 sbi->s_jquota_fmt != m->mount_opt) { 1605 ext4_msg(sb, KERN_ERR, "Cannot change journaled " 1606 "quota options when quota turned on"); 1607 return -1; 1608 } 1609 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1610 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 1611 ext4_msg(sb, KERN_ERR, 1612 "Cannot set journaled quota options " 1613 "when QUOTA feature is enabled"); 1614 return -1; 1615 } 1616 sbi->s_jquota_fmt = m->mount_opt; 1617 #endif 1618 #ifndef CONFIG_FS_DAX 1619 } else if (token == Opt_dax) { 1620 ext4_msg(sb, KERN_INFO, "dax option not supported"); 1621 return -1; 1622 #endif 1623 } else { 1624 if (!args->from) 1625 arg = 1; 1626 if (m->flags & MOPT_CLEAR) 1627 arg = !arg; 1628 else if (unlikely(!(m->flags & MOPT_SET))) { 1629 ext4_msg(sb, KERN_WARNING, 1630 "buggy handling of option %s", opt); 1631 WARN_ON(1); 1632 return -1; 1633 } 1634 if (arg != 0) 1635 sbi->s_mount_opt |= m->mount_opt; 1636 else 1637 sbi->s_mount_opt &= ~m->mount_opt; 1638 } 1639 return 1; 1640 } 1641 1642 static int parse_options(char *options, struct super_block *sb, 1643 unsigned long *journal_devnum, 1644 unsigned int *journal_ioprio, 1645 int is_remount) 1646 { 1647 struct ext4_sb_info *sbi = EXT4_SB(sb); 1648 char *p; 1649 substring_t args[MAX_OPT_ARGS]; 1650 int token; 1651 1652 if (!options) 1653 return 1; 1654 1655 while ((p = strsep(&options, ",")) != NULL) { 1656 if (!*p) 1657 continue; 1658 /* 1659 * Initialize args struct so we know whether arg was 1660 * found; some options take optional arguments. 1661 */ 1662 args[0].to = args[0].from = NULL; 1663 token = match_token(p, tokens, args); 1664 if (handle_mount_opt(sb, p, token, args, journal_devnum, 1665 journal_ioprio, is_remount) < 0) 1666 return 0; 1667 } 1668 #ifdef CONFIG_QUOTA 1669 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 1670 (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { 1671 ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " 1672 "feature is enabled"); 1673 return 0; 1674 } 1675 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1676 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1677 clear_opt(sb, USRQUOTA); 1678 1679 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1680 clear_opt(sb, GRPQUOTA); 1681 1682 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1683 ext4_msg(sb, KERN_ERR, "old and new quota " 1684 "format mixing"); 1685 return 0; 1686 } 1687 1688 if (!sbi->s_jquota_fmt) { 1689 ext4_msg(sb, KERN_ERR, "journaled quota format " 1690 "not specified"); 1691 return 0; 1692 } 1693 } 1694 #endif 1695 if (test_opt(sb, DIOREAD_NOLOCK)) { 1696 int blocksize = 1697 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); 1698 1699 if (blocksize < PAGE_CACHE_SIZE) { 1700 ext4_msg(sb, KERN_ERR, "can't mount with " 1701 "dioread_nolock if block size != PAGE_SIZE"); 1702 return 0; 1703 } 1704 } 1705 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && 1706 test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 1707 ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " 1708 "in data=ordered mode"); 1709 return 0; 1710 } 1711 return 1; 1712 } 1713 1714 static inline void ext4_show_quota_options(struct seq_file *seq, 1715 struct super_block *sb) 1716 { 1717 #if defined(CONFIG_QUOTA) 1718 struct ext4_sb_info *sbi = EXT4_SB(sb); 1719 1720 if (sbi->s_jquota_fmt) { 1721 char *fmtname = ""; 1722 1723 switch (sbi->s_jquota_fmt) { 1724 case QFMT_VFS_OLD: 1725 fmtname = "vfsold"; 1726 break; 1727 case QFMT_VFS_V0: 1728 fmtname = "vfsv0"; 1729 break; 1730 case QFMT_VFS_V1: 1731 fmtname = "vfsv1"; 1732 break; 1733 } 1734 seq_printf(seq, ",jqfmt=%s", fmtname); 1735 } 1736 1737 if (sbi->s_qf_names[USRQUOTA]) 1738 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1739 1740 if (sbi->s_qf_names[GRPQUOTA]) 1741 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1742 #endif 1743 } 1744 1745 static const char *token2str(int token) 1746 { 1747 const struct match_token *t; 1748 1749 for (t = tokens; t->token != Opt_err; t++) 1750 if (t->token == token && !strchr(t->pattern, '=')) 1751 break; 1752 return t->pattern; 1753 } 1754 1755 /* 1756 * Show an option if 1757 * - it's set to a non-default value OR 1758 * - if the per-sb default is different from the global default 1759 */ 1760 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, 1761 int nodefs) 1762 { 1763 struct ext4_sb_info *sbi = EXT4_SB(sb); 1764 struct ext4_super_block *es = sbi->s_es; 1765 int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; 1766 const struct mount_opts *m; 1767 char sep = nodefs ? '\n' : ','; 1768 1769 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) 1770 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) 1771 1772 if (sbi->s_sb_block != 1) 1773 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); 1774 1775 for (m = ext4_mount_opts; m->token != Opt_err; m++) { 1776 int want_set = m->flags & MOPT_SET; 1777 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || 1778 (m->flags & MOPT_CLEAR_ERR)) 1779 continue; 1780 if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) 1781 continue; /* skip if same as the default */ 1782 if ((want_set && 1783 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || 1784 (!want_set && (sbi->s_mount_opt & m->mount_opt))) 1785 continue; /* select Opt_noFoo vs Opt_Foo */ 1786 SEQ_OPTS_PRINT("%s", token2str(m->token)); 1787 } 1788 1789 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || 1790 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) 1791 SEQ_OPTS_PRINT("resuid=%u", 1792 from_kuid_munged(&init_user_ns, sbi->s_resuid)); 1793 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || 1794 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) 1795 SEQ_OPTS_PRINT("resgid=%u", 1796 from_kgid_munged(&init_user_ns, sbi->s_resgid)); 1797 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); 1798 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) 1799 SEQ_OPTS_PUTS("errors=remount-ro"); 1800 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 1801 SEQ_OPTS_PUTS("errors=continue"); 1802 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 1803 SEQ_OPTS_PUTS("errors=panic"); 1804 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) 1805 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); 1806 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) 1807 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); 1808 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) 1809 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); 1810 if (sb->s_flags & MS_I_VERSION) 1811 SEQ_OPTS_PUTS("i_version"); 1812 if (nodefs || sbi->s_stripe) 1813 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); 1814 if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { 1815 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1816 SEQ_OPTS_PUTS("data=journal"); 1817 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1818 SEQ_OPTS_PUTS("data=ordered"); 1819 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1820 SEQ_OPTS_PUTS("data=writeback"); 1821 } 1822 if (nodefs || 1823 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1824 SEQ_OPTS_PRINT("inode_readahead_blks=%u", 1825 sbi->s_inode_readahead_blks); 1826 1827 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && 1828 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) 1829 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); 1830 if (nodefs || sbi->s_max_dir_size_kb) 1831 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); 1832 1833 ext4_show_quota_options(seq, sb); 1834 return 0; 1835 } 1836 1837 static int ext4_show_options(struct seq_file *seq, struct dentry *root) 1838 { 1839 return _ext4_show_options(seq, root->d_sb, 0); 1840 } 1841 1842 static int options_seq_show(struct seq_file *seq, void *offset) 1843 { 1844 struct super_block *sb = seq->private; 1845 int rc; 1846 1847 seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); 1848 rc = _ext4_show_options(seq, sb, 1); 1849 seq_puts(seq, "\n"); 1850 return rc; 1851 } 1852 1853 static int options_open_fs(struct inode *inode, struct file *file) 1854 { 1855 return single_open(file, options_seq_show, PDE_DATA(inode)); 1856 } 1857 1858 static const struct file_operations ext4_seq_options_fops = { 1859 .owner = THIS_MODULE, 1860 .open = options_open_fs, 1861 .read = seq_read, 1862 .llseek = seq_lseek, 1863 .release = single_release, 1864 }; 1865 1866 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, 1867 int read_only) 1868 { 1869 struct ext4_sb_info *sbi = EXT4_SB(sb); 1870 int res = 0; 1871 1872 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1873 ext4_msg(sb, KERN_ERR, "revision level too high, " 1874 "forcing read-only mode"); 1875 res = MS_RDONLY; 1876 } 1877 if (read_only) 1878 goto done; 1879 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1880 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1881 "running e2fsck is recommended"); 1882 else if (sbi->s_mount_state & EXT4_ERROR_FS) 1883 ext4_msg(sb, KERN_WARNING, 1884 "warning: mounting fs with errors, " 1885 "running e2fsck is recommended"); 1886 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && 1887 le16_to_cpu(es->s_mnt_count) >= 1888 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1889 ext4_msg(sb, KERN_WARNING, 1890 "warning: maximal mount count reached, " 1891 "running e2fsck is recommended"); 1892 else if (le32_to_cpu(es->s_checkinterval) && 1893 (le32_to_cpu(es->s_lastcheck) + 1894 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1895 ext4_msg(sb, KERN_WARNING, 1896 "warning: checktime reached, " 1897 "running e2fsck is recommended"); 1898 if (!sbi->s_journal) 1899 es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1900 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1901 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1902 le16_add_cpu(&es->s_mnt_count, 1); 1903 es->s_mtime = cpu_to_le32(get_seconds()); 1904 ext4_update_dynamic_rev(sb); 1905 if (sbi->s_journal) 1906 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1907 1908 ext4_commit_super(sb, 1); 1909 done: 1910 if (test_opt(sb, DEBUG)) 1911 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1912 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1913 sb->s_blocksize, 1914 sbi->s_groups_count, 1915 EXT4_BLOCKS_PER_GROUP(sb), 1916 EXT4_INODES_PER_GROUP(sb), 1917 sbi->s_mount_opt, sbi->s_mount_opt2); 1918 1919 cleancache_init_fs(sb); 1920 return res; 1921 } 1922 1923 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) 1924 { 1925 struct ext4_sb_info *sbi = EXT4_SB(sb); 1926 struct flex_groups *new_groups; 1927 int size; 1928 1929 if (!sbi->s_log_groups_per_flex) 1930 return 0; 1931 1932 size = ext4_flex_group(sbi, ngroup - 1) + 1; 1933 if (size <= sbi->s_flex_groups_allocated) 1934 return 0; 1935 1936 size = roundup_pow_of_two(size * sizeof(struct flex_groups)); 1937 new_groups = ext4_kvzalloc(size, GFP_KERNEL); 1938 if (!new_groups) { 1939 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", 1940 size / (int) sizeof(struct flex_groups)); 1941 return -ENOMEM; 1942 } 1943 1944 if (sbi->s_flex_groups) { 1945 memcpy(new_groups, sbi->s_flex_groups, 1946 (sbi->s_flex_groups_allocated * 1947 sizeof(struct flex_groups))); 1948 kvfree(sbi->s_flex_groups); 1949 } 1950 sbi->s_flex_groups = new_groups; 1951 sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); 1952 return 0; 1953 } 1954 1955 static int ext4_fill_flex_info(struct super_block *sb) 1956 { 1957 struct ext4_sb_info *sbi = EXT4_SB(sb); 1958 struct ext4_group_desc *gdp = NULL; 1959 ext4_group_t flex_group; 1960 int i, err; 1961 1962 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1963 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { 1964 sbi->s_log_groups_per_flex = 0; 1965 return 1; 1966 } 1967 1968 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); 1969 if (err) 1970 goto failed; 1971 1972 for (i = 0; i < sbi->s_groups_count; i++) { 1973 gdp = ext4_get_group_desc(sb, i, NULL); 1974 1975 flex_group = ext4_flex_group(sbi, i); 1976 atomic_add(ext4_free_inodes_count(sb, gdp), 1977 &sbi->s_flex_groups[flex_group].free_inodes); 1978 atomic64_add(ext4_free_group_clusters(sb, gdp), 1979 &sbi->s_flex_groups[flex_group].free_clusters); 1980 atomic_add(ext4_used_dirs_count(sb, gdp), 1981 &sbi->s_flex_groups[flex_group].used_dirs); 1982 } 1983 1984 return 1; 1985 failed: 1986 return 0; 1987 } 1988 1989 static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1990 struct ext4_group_desc *gdp) 1991 { 1992 int offset; 1993 __u16 crc = 0; 1994 __le32 le_group = cpu_to_le32(block_group); 1995 1996 if (ext4_has_metadata_csum(sbi->s_sb)) { 1997 /* Use new metadata_csum algorithm */ 1998 __le16 save_csum; 1999 __u32 csum32; 2000 2001 save_csum = gdp->bg_checksum; 2002 gdp->bg_checksum = 0; 2003 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, 2004 sizeof(le_group)); 2005 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, 2006 sbi->s_desc_size); 2007 gdp->bg_checksum = save_csum; 2008 2009 crc = csum32 & 0xFFFF; 2010 goto out; 2011 } 2012 2013 /* old crc16 code */ 2014 if (!(sbi->s_es->s_feature_ro_compat & 2015 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) 2016 return 0; 2017 2018 offset = offsetof(struct ext4_group_desc, bg_checksum); 2019 2020 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 2021 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 2022 crc = crc16(crc, (__u8 *)gdp, offset); 2023 offset += sizeof(gdp->bg_checksum); /* skip checksum */ 2024 /* for checksum of struct ext4_group_desc do the rest...*/ 2025 if ((sbi->s_es->s_feature_incompat & 2026 cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 2027 offset < le16_to_cpu(sbi->s_es->s_desc_size)) 2028 crc = crc16(crc, (__u8 *)gdp + offset, 2029 le16_to_cpu(sbi->s_es->s_desc_size) - 2030 offset); 2031 2032 out: 2033 return cpu_to_le16(crc); 2034 } 2035 2036 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, 2037 struct ext4_group_desc *gdp) 2038 { 2039 if (ext4_has_group_desc_csum(sb) && 2040 (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), 2041 block_group, gdp))) 2042 return 0; 2043 2044 return 1; 2045 } 2046 2047 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, 2048 struct ext4_group_desc *gdp) 2049 { 2050 if (!ext4_has_group_desc_csum(sb)) 2051 return; 2052 gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); 2053 } 2054 2055 /* Called at mount-time, super-block is locked */ 2056 static int ext4_check_descriptors(struct super_block *sb, 2057 ext4_group_t *first_not_zeroed) 2058 { 2059 struct ext4_sb_info *sbi = EXT4_SB(sb); 2060 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); 2061 ext4_fsblk_t last_block; 2062 ext4_fsblk_t block_bitmap; 2063 ext4_fsblk_t inode_bitmap; 2064 ext4_fsblk_t inode_table; 2065 int flexbg_flag = 0; 2066 ext4_group_t i, grp = sbi->s_groups_count; 2067 2068 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 2069 flexbg_flag = 1; 2070 2071 ext4_debug("Checking group descriptors"); 2072 2073 for (i = 0; i < sbi->s_groups_count; i++) { 2074 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 2075 2076 if (i == sbi->s_groups_count - 1 || flexbg_flag) 2077 last_block = ext4_blocks_count(sbi->s_es) - 1; 2078 else 2079 last_block = first_block + 2080 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 2081 2082 if ((grp == sbi->s_groups_count) && 2083 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2084 grp = i; 2085 2086 block_bitmap = ext4_block_bitmap(sb, gdp); 2087 if (block_bitmap < first_block || block_bitmap > last_block) { 2088 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2089 "Block bitmap for group %u not in group " 2090 "(block %llu)!", i, block_bitmap); 2091 return 0; 2092 } 2093 inode_bitmap = ext4_inode_bitmap(sb, gdp); 2094 if (inode_bitmap < first_block || inode_bitmap > last_block) { 2095 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2096 "Inode bitmap for group %u not in group " 2097 "(block %llu)!", i, inode_bitmap); 2098 return 0; 2099 } 2100 inode_table = ext4_inode_table(sb, gdp); 2101 if (inode_table < first_block || 2102 inode_table + sbi->s_itb_per_group - 1 > last_block) { 2103 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2104 "Inode table for group %u not in group " 2105 "(block %llu)!", i, inode_table); 2106 return 0; 2107 } 2108 ext4_lock_group(sb, i); 2109 if (!ext4_group_desc_csum_verify(sb, i, gdp)) { 2110 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2111 "Checksum for group %u failed (%u!=%u)", 2112 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 2113 gdp)), le16_to_cpu(gdp->bg_checksum)); 2114 if (!(sb->s_flags & MS_RDONLY)) { 2115 ext4_unlock_group(sb, i); 2116 return 0; 2117 } 2118 } 2119 ext4_unlock_group(sb, i); 2120 if (!flexbg_flag) 2121 first_block += EXT4_BLOCKS_PER_GROUP(sb); 2122 } 2123 if (NULL != first_not_zeroed) 2124 *first_not_zeroed = grp; 2125 return 1; 2126 } 2127 2128 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at 2129 * the superblock) which were deleted from all directories, but held open by 2130 * a process at the time of a crash. We walk the list and try to delete these 2131 * inodes at recovery time (only with a read-write filesystem). 2132 * 2133 * In order to keep the orphan inode chain consistent during traversal (in 2134 * case of crash during recovery), we link each inode into the superblock 2135 * orphan list_head and handle it the same way as an inode deletion during 2136 * normal operation (which journals the operations for us). 2137 * 2138 * We only do an iget() and an iput() on each inode, which is very safe if we 2139 * accidentally point at an in-use or already deleted inode. The worst that 2140 * can happen in this case is that we get a "bit already cleared" message from 2141 * ext4_free_inode(). The only reason we would point at a wrong inode is if 2142 * e2fsck was run on this filesystem, and it must have already done the orphan 2143 * inode cleanup for us, so we can safely abort without any further action. 2144 */ 2145 static void ext4_orphan_cleanup(struct super_block *sb, 2146 struct ext4_super_block *es) 2147 { 2148 unsigned int s_flags = sb->s_flags; 2149 int nr_orphans = 0, nr_truncates = 0; 2150 #ifdef CONFIG_QUOTA 2151 int i; 2152 #endif 2153 if (!es->s_last_orphan) { 2154 jbd_debug(4, "no orphan inodes to clean up\n"); 2155 return; 2156 } 2157 2158 if (bdev_read_only(sb->s_bdev)) { 2159 ext4_msg(sb, KERN_ERR, "write access " 2160 "unavailable, skipping orphan cleanup"); 2161 return; 2162 } 2163 2164 /* Check if feature set would not allow a r/w mount */ 2165 if (!ext4_feature_set_ok(sb, 0)) { 2166 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " 2167 "unknown ROCOMPAT features"); 2168 return; 2169 } 2170 2171 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2172 /* don't clear list on RO mount w/ errors */ 2173 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { 2174 ext4_msg(sb, KERN_INFO, "Errors on filesystem, " 2175 "clearing orphan list.\n"); 2176 es->s_last_orphan = 0; 2177 } 2178 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 2179 return; 2180 } 2181 2182 if (s_flags & MS_RDONLY) { 2183 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 2184 sb->s_flags &= ~MS_RDONLY; 2185 } 2186 #ifdef CONFIG_QUOTA 2187 /* Needed for iput() to work correctly and not trash data */ 2188 sb->s_flags |= MS_ACTIVE; 2189 /* Turn on quotas so that they are updated correctly */ 2190 for (i = 0; i < EXT4_MAXQUOTAS; i++) { 2191 if (EXT4_SB(sb)->s_qf_names[i]) { 2192 int ret = ext4_quota_on_mount(sb, i); 2193 if (ret < 0) 2194 ext4_msg(sb, KERN_ERR, 2195 "Cannot turn on journaled " 2196 "quota: error %d", ret); 2197 } 2198 } 2199 #endif 2200 2201 while (es->s_last_orphan) { 2202 struct inode *inode; 2203 2204 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 2205 if (IS_ERR(inode)) { 2206 es->s_last_orphan = 0; 2207 break; 2208 } 2209 2210 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2211 dquot_initialize(inode); 2212 if (inode->i_nlink) { 2213 if (test_opt(sb, DEBUG)) 2214 ext4_msg(sb, KERN_DEBUG, 2215 "%s: truncating inode %lu to %lld bytes", 2216 __func__, inode->i_ino, inode->i_size); 2217 jbd_debug(2, "truncating inode %lu to %lld bytes\n", 2218 inode->i_ino, inode->i_size); 2219 mutex_lock(&inode->i_mutex); 2220 truncate_inode_pages(inode->i_mapping, inode->i_size); 2221 ext4_truncate(inode); 2222 mutex_unlock(&inode->i_mutex); 2223 nr_truncates++; 2224 } else { 2225 if (test_opt(sb, DEBUG)) 2226 ext4_msg(sb, KERN_DEBUG, 2227 "%s: deleting unreferenced inode %lu", 2228 __func__, inode->i_ino); 2229 jbd_debug(2, "deleting unreferenced inode %lu\n", 2230 inode->i_ino); 2231 nr_orphans++; 2232 } 2233 iput(inode); /* The delete magic happens here! */ 2234 } 2235 2236 #define PLURAL(x) (x), ((x) == 1) ? "" : "s" 2237 2238 if (nr_orphans) 2239 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 2240 PLURAL(nr_orphans)); 2241 if (nr_truncates) 2242 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 2243 PLURAL(nr_truncates)); 2244 #ifdef CONFIG_QUOTA 2245 /* Turn quotas off */ 2246 for (i = 0; i < EXT4_MAXQUOTAS; i++) { 2247 if (sb_dqopt(sb)->files[i]) 2248 dquot_quota_off(sb, i); 2249 } 2250 #endif 2251 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 2252 } 2253 2254 /* 2255 * Maximal extent format file size. 2256 * Resulting logical blkno at s_maxbytes must fit in our on-disk 2257 * extent format containers, within a sector_t, and within i_blocks 2258 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, 2259 * so that won't be a limiting factor. 2260 * 2261 * However there is other limiting factor. We do store extents in the form 2262 * of starting block and length, hence the resulting length of the extent 2263 * covering maximum file size must fit into on-disk format containers as 2264 * well. Given that length is always by 1 unit bigger than max unit (because 2265 * we count 0 as well) we have to lower the s_maxbytes by one fs block. 2266 * 2267 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 2268 */ 2269 static loff_t ext4_max_size(int blkbits, int has_huge_files) 2270 { 2271 loff_t res; 2272 loff_t upper_limit = MAX_LFS_FILESIZE; 2273 2274 /* small i_blocks in vfs inode? */ 2275 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2276 /* 2277 * CONFIG_LBDAF is not enabled implies the inode 2278 * i_block represent total blocks in 512 bytes 2279 * 32 == size of vfs inode i_blocks * 8 2280 */ 2281 upper_limit = (1LL << 32) - 1; 2282 2283 /* total blocks in file system block size */ 2284 upper_limit >>= (blkbits - 9); 2285 upper_limit <<= blkbits; 2286 } 2287 2288 /* 2289 * 32-bit extent-start container, ee_block. We lower the maxbytes 2290 * by one fs block, so ee_len can cover the extent of maximum file 2291 * size 2292 */ 2293 res = (1LL << 32) - 1; 2294 res <<= blkbits; 2295 2296 /* Sanity check against vm- & vfs- imposed limits */ 2297 if (res > upper_limit) 2298 res = upper_limit; 2299 2300 return res; 2301 } 2302 2303 /* 2304 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect 2305 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 2306 * We need to be 1 filesystem block less than the 2^48 sector limit. 2307 */ 2308 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) 2309 { 2310 loff_t res = EXT4_NDIR_BLOCKS; 2311 int meta_blocks; 2312 loff_t upper_limit; 2313 /* This is calculated to be the largest file size for a dense, block 2314 * mapped file such that the file's total number of 512-byte sectors, 2315 * including data and all indirect blocks, does not exceed (2^48 - 1). 2316 * 2317 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total 2318 * number of 512-byte sectors of the file. 2319 */ 2320 2321 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 2322 /* 2323 * !has_huge_files or CONFIG_LBDAF not enabled implies that 2324 * the inode i_block field represents total file blocks in 2325 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 2326 */ 2327 upper_limit = (1LL << 32) - 1; 2328 2329 /* total blocks in file system block size */ 2330 upper_limit >>= (bits - 9); 2331 2332 } else { 2333 /* 2334 * We use 48 bit ext4_inode i_blocks 2335 * With EXT4_HUGE_FILE_FL set the i_blocks 2336 * represent total number of blocks in 2337 * file system block size 2338 */ 2339 upper_limit = (1LL << 48) - 1; 2340 2341 } 2342 2343 /* indirect blocks */ 2344 meta_blocks = 1; 2345 /* double indirect blocks */ 2346 meta_blocks += 1 + (1LL << (bits-2)); 2347 /* tripple indirect blocks */ 2348 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 2349 2350 upper_limit -= meta_blocks; 2351 upper_limit <<= bits; 2352 2353 res += 1LL << (bits-2); 2354 res += 1LL << (2*(bits-2)); 2355 res += 1LL << (3*(bits-2)); 2356 res <<= bits; 2357 if (res > upper_limit) 2358 res = upper_limit; 2359 2360 if (res > MAX_LFS_FILESIZE) 2361 res = MAX_LFS_FILESIZE; 2362 2363 return res; 2364 } 2365 2366 static ext4_fsblk_t descriptor_loc(struct super_block *sb, 2367 ext4_fsblk_t logical_sb_block, int nr) 2368 { 2369 struct ext4_sb_info *sbi = EXT4_SB(sb); 2370 ext4_group_t bg, first_meta_bg; 2371 int has_super = 0; 2372 2373 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 2374 2375 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 2376 nr < first_meta_bg) 2377 return logical_sb_block + nr + 1; 2378 bg = sbi->s_desc_per_block * nr; 2379 if (ext4_bg_has_super(sb, bg)) 2380 has_super = 1; 2381 2382 /* 2383 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at 2384 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled 2385 * on modern mke2fs or blksize > 1k on older mke2fs) then we must 2386 * compensate. 2387 */ 2388 if (sb->s_blocksize == 1024 && nr == 0 && 2389 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) 2390 has_super++; 2391 2392 return (has_super + ext4_group_first_block_no(sb, bg)); 2393 } 2394 2395 /** 2396 * ext4_get_stripe_size: Get the stripe size. 2397 * @sbi: In memory super block info 2398 * 2399 * If we have specified it via mount option, then 2400 * use the mount option value. If the value specified at mount time is 2401 * greater than the blocks per group use the super block value. 2402 * If the super block value is greater than blocks per group return 0. 2403 * Allocator needs it be less than blocks per group. 2404 * 2405 */ 2406 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) 2407 { 2408 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2409 unsigned long stripe_width = 2410 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2411 int ret; 2412 2413 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2414 ret = sbi->s_stripe; 2415 else if (stripe_width <= sbi->s_blocks_per_group) 2416 ret = stripe_width; 2417 else if (stride <= sbi->s_blocks_per_group) 2418 ret = stride; 2419 else 2420 ret = 0; 2421 2422 /* 2423 * If the stripe width is 1, this makes no sense and 2424 * we set it to 0 to turn off stripe handling code. 2425 */ 2426 if (ret <= 1) 2427 ret = 0; 2428 2429 return ret; 2430 } 2431 2432 /* sysfs supprt */ 2433 2434 struct ext4_attr { 2435 struct attribute attr; 2436 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); 2437 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 2438 const char *, size_t); 2439 union { 2440 int offset; 2441 int deprecated_val; 2442 } u; 2443 }; 2444 2445 static int parse_strtoull(const char *buf, 2446 unsigned long long max, unsigned long long *value) 2447 { 2448 int ret; 2449 2450 ret = kstrtoull(skip_spaces(buf), 0, value); 2451 if (!ret && *value > max) 2452 ret = -EINVAL; 2453 return ret; 2454 } 2455 2456 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, 2457 struct ext4_sb_info *sbi, 2458 char *buf) 2459 { 2460 return snprintf(buf, PAGE_SIZE, "%llu\n", 2461 (s64) EXT4_C2B(sbi, 2462 percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 2463 } 2464 2465 static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2466 struct ext4_sb_info *sbi, char *buf) 2467 { 2468 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2469 2470 if (!sb->s_bdev->bd_part) 2471 return snprintf(buf, PAGE_SIZE, "0\n"); 2472 return snprintf(buf, PAGE_SIZE, "%lu\n", 2473 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2474 sbi->s_sectors_written_start) >> 1); 2475 } 2476 2477 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, 2478 struct ext4_sb_info *sbi, char *buf) 2479 { 2480 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2481 2482 if (!sb->s_bdev->bd_part) 2483 return snprintf(buf, PAGE_SIZE, "0\n"); 2484 return snprintf(buf, PAGE_SIZE, "%llu\n", 2485 (unsigned long long)(sbi->s_kbytes_written + 2486 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2487 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 2488 } 2489 2490 static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2491 struct ext4_sb_info *sbi, 2492 const char *buf, size_t count) 2493 { 2494 unsigned long t; 2495 int ret; 2496 2497 ret = kstrtoul(skip_spaces(buf), 0, &t); 2498 if (ret) 2499 return ret; 2500 2501 if (t && (!is_power_of_2(t) || t > 0x40000000)) 2502 return -EINVAL; 2503 2504 sbi->s_inode_readahead_blks = t; 2505 return count; 2506 } 2507 2508 static ssize_t sbi_ui_show(struct ext4_attr *a, 2509 struct ext4_sb_info *sbi, char *buf) 2510 { 2511 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); 2512 2513 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2514 } 2515 2516 static ssize_t sbi_ui_store(struct ext4_attr *a, 2517 struct ext4_sb_info *sbi, 2518 const char *buf, size_t count) 2519 { 2520 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); 2521 unsigned long t; 2522 int ret; 2523 2524 ret = kstrtoul(skip_spaces(buf), 0, &t); 2525 if (ret) 2526 return ret; 2527 *ui = t; 2528 return count; 2529 } 2530 2531 static ssize_t es_ui_show(struct ext4_attr *a, 2532 struct ext4_sb_info *sbi, char *buf) 2533 { 2534 2535 unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + 2536 a->u.offset); 2537 2538 return snprintf(buf, PAGE_SIZE, "%u\n", *ui); 2539 } 2540 2541 static ssize_t reserved_clusters_show(struct ext4_attr *a, 2542 struct ext4_sb_info *sbi, char *buf) 2543 { 2544 return snprintf(buf, PAGE_SIZE, "%llu\n", 2545 (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); 2546 } 2547 2548 static ssize_t reserved_clusters_store(struct ext4_attr *a, 2549 struct ext4_sb_info *sbi, 2550 const char *buf, size_t count) 2551 { 2552 unsigned long long val; 2553 int ret; 2554 2555 if (parse_strtoull(buf, -1ULL, &val)) 2556 return -EINVAL; 2557 ret = ext4_reserve_clusters(sbi, val); 2558 2559 return ret ? ret : count; 2560 } 2561 2562 static ssize_t trigger_test_error(struct ext4_attr *a, 2563 struct ext4_sb_info *sbi, 2564 const char *buf, size_t count) 2565 { 2566 int len = count; 2567 2568 if (!capable(CAP_SYS_ADMIN)) 2569 return -EPERM; 2570 2571 if (len && buf[len-1] == '\n') 2572 len--; 2573 2574 if (len) 2575 ext4_error(sbi->s_sb, "%.*s", len, buf); 2576 return count; 2577 } 2578 2579 static ssize_t sbi_deprecated_show(struct ext4_attr *a, 2580 struct ext4_sb_info *sbi, char *buf) 2581 { 2582 return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); 2583 } 2584 2585 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2586 static struct ext4_attr ext4_attr_##_name = { \ 2587 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2588 .show = _show, \ 2589 .store = _store, \ 2590 .u = { \ 2591 .offset = offsetof(struct ext4_sb_info, _elname),\ 2592 }, \ 2593 } 2594 2595 #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ 2596 static struct ext4_attr ext4_attr_##_name = { \ 2597 .attr = {.name = __stringify(_name), .mode = _mode }, \ 2598 .show = _show, \ 2599 .store = _store, \ 2600 .u = { \ 2601 .offset = offsetof(struct ext4_super_block, _elname), \ 2602 }, \ 2603 } 2604 2605 #define EXT4_ATTR(name, mode, show, store) \ 2606 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2607 2608 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) 2609 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2610 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2611 2612 #define EXT4_RO_ATTR_ES_UI(name, elname) \ 2613 EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) 2614 #define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2615 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2616 2617 #define ATTR_LIST(name) &ext4_attr_##name.attr 2618 #define EXT4_DEPRECATED_ATTR(_name, _val) \ 2619 static struct ext4_attr ext4_attr_##_name = { \ 2620 .attr = {.name = __stringify(_name), .mode = 0444 }, \ 2621 .show = sbi_deprecated_show, \ 2622 .u = { \ 2623 .deprecated_val = _val, \ 2624 }, \ 2625 } 2626 2627 EXT4_RO_ATTR(delayed_allocation_blocks); 2628 EXT4_RO_ATTR(session_write_kbytes); 2629 EXT4_RO_ATTR(lifetime_write_kbytes); 2630 EXT4_RW_ATTR(reserved_clusters); 2631 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2632 inode_readahead_blks_store, s_inode_readahead_blks); 2633 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); 2634 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2635 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2636 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2637 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); 2638 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2639 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2640 EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); 2641 EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); 2642 EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); 2643 EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); 2644 EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); 2645 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); 2646 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); 2647 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); 2648 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); 2649 EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); 2650 EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); 2651 EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); 2652 2653 static struct attribute *ext4_attrs[] = { 2654 ATTR_LIST(delayed_allocation_blocks), 2655 ATTR_LIST(session_write_kbytes), 2656 ATTR_LIST(lifetime_write_kbytes), 2657 ATTR_LIST(reserved_clusters), 2658 ATTR_LIST(inode_readahead_blks), 2659 ATTR_LIST(inode_goal), 2660 ATTR_LIST(mb_stats), 2661 ATTR_LIST(mb_max_to_scan), 2662 ATTR_LIST(mb_min_to_scan), 2663 ATTR_LIST(mb_order2_req), 2664 ATTR_LIST(mb_stream_req), 2665 ATTR_LIST(mb_group_prealloc), 2666 ATTR_LIST(max_writeback_mb_bump), 2667 ATTR_LIST(extent_max_zeroout_kb), 2668 ATTR_LIST(trigger_fs_error), 2669 ATTR_LIST(err_ratelimit_interval_ms), 2670 ATTR_LIST(err_ratelimit_burst), 2671 ATTR_LIST(warning_ratelimit_interval_ms), 2672 ATTR_LIST(warning_ratelimit_burst), 2673 ATTR_LIST(msg_ratelimit_interval_ms), 2674 ATTR_LIST(msg_ratelimit_burst), 2675 ATTR_LIST(errors_count), 2676 ATTR_LIST(first_error_time), 2677 ATTR_LIST(last_error_time), 2678 NULL, 2679 }; 2680 2681 /* Features this copy of ext4 supports */ 2682 EXT4_INFO_ATTR(lazy_itable_init); 2683 EXT4_INFO_ATTR(batched_discard); 2684 EXT4_INFO_ATTR(meta_bg_resize); 2685 EXT4_INFO_ATTR(encryption); 2686 2687 static struct attribute *ext4_feat_attrs[] = { 2688 ATTR_LIST(lazy_itable_init), 2689 ATTR_LIST(batched_discard), 2690 ATTR_LIST(meta_bg_resize), 2691 ATTR_LIST(encryption), 2692 NULL, 2693 }; 2694 2695 static ssize_t ext4_attr_show(struct kobject *kobj, 2696 struct attribute *attr, char *buf) 2697 { 2698 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2699 s_kobj); 2700 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2701 2702 return a->show ? a->show(a, sbi, buf) : 0; 2703 } 2704 2705 static ssize_t ext4_attr_store(struct kobject *kobj, 2706 struct attribute *attr, 2707 const char *buf, size_t len) 2708 { 2709 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2710 s_kobj); 2711 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); 2712 2713 return a->store ? a->store(a, sbi, buf, len) : 0; 2714 } 2715 2716 static void ext4_sb_release(struct kobject *kobj) 2717 { 2718 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, 2719 s_kobj); 2720 complete(&sbi->s_kobj_unregister); 2721 } 2722 2723 static const struct sysfs_ops ext4_attr_ops = { 2724 .show = ext4_attr_show, 2725 .store = ext4_attr_store, 2726 }; 2727 2728 static struct kobj_type ext4_ktype = { 2729 .default_attrs = ext4_attrs, 2730 .sysfs_ops = &ext4_attr_ops, 2731 .release = ext4_sb_release, 2732 }; 2733 2734 static void ext4_feat_release(struct kobject *kobj) 2735 { 2736 complete(&ext4_feat->f_kobj_unregister); 2737 } 2738 2739 static ssize_t ext4_feat_show(struct kobject *kobj, 2740 struct attribute *attr, char *buf) 2741 { 2742 return snprintf(buf, PAGE_SIZE, "supported\n"); 2743 } 2744 2745 /* 2746 * We can not use ext4_attr_show/store because it relies on the kobject 2747 * being embedded in the ext4_sb_info structure which is definitely not 2748 * true in this case. 2749 */ 2750 static const struct sysfs_ops ext4_feat_ops = { 2751 .show = ext4_feat_show, 2752 .store = NULL, 2753 }; 2754 2755 static struct kobj_type ext4_feat_ktype = { 2756 .default_attrs = ext4_feat_attrs, 2757 .sysfs_ops = &ext4_feat_ops, 2758 .release = ext4_feat_release, 2759 }; 2760 2761 /* 2762 * Check whether this filesystem can be mounted based on 2763 * the features present and the RDONLY/RDWR mount requested. 2764 * Returns 1 if this filesystem can be mounted as requested, 2765 * 0 if it cannot be. 2766 */ 2767 static int ext4_feature_set_ok(struct super_block *sb, int readonly) 2768 { 2769 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { 2770 ext4_msg(sb, KERN_ERR, 2771 "Couldn't mount because of " 2772 "unsupported optional features (%x)", 2773 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & 2774 ~EXT4_FEATURE_INCOMPAT_SUPP)); 2775 return 0; 2776 } 2777 2778 if (readonly) 2779 return 1; 2780 2781 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) { 2782 ext4_msg(sb, KERN_INFO, "filesystem is read-only"); 2783 sb->s_flags |= MS_RDONLY; 2784 return 1; 2785 } 2786 2787 /* Check that feature set is OK for a read-write mount */ 2788 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { 2789 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " 2790 "unsupported optional features (%x)", 2791 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & 2792 ~EXT4_FEATURE_RO_COMPAT_SUPP)); 2793 return 0; 2794 } 2795 /* 2796 * Large file size enabled file system can only be mounted 2797 * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF 2798 */ 2799 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2800 if (sizeof(blkcnt_t) < sizeof(u64)) { 2801 ext4_msg(sb, KERN_ERR, "Filesystem with huge files " 2802 "cannot be mounted RDWR without " 2803 "CONFIG_LBDAF"); 2804 return 0; 2805 } 2806 } 2807 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && 2808 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2809 ext4_msg(sb, KERN_ERR, 2810 "Can't support bigalloc feature without " 2811 "extents feature\n"); 2812 return 0; 2813 } 2814 2815 #ifndef CONFIG_QUOTA 2816 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 2817 !readonly) { 2818 ext4_msg(sb, KERN_ERR, 2819 "Filesystem with quota feature cannot be mounted RDWR " 2820 "without CONFIG_QUOTA"); 2821 return 0; 2822 } 2823 #endif /* CONFIG_QUOTA */ 2824 return 1; 2825 } 2826 2827 /* 2828 * This function is called once a day if we have errors logged 2829 * on the file system 2830 */ 2831 static void print_daily_error_info(unsigned long arg) 2832 { 2833 struct super_block *sb = (struct super_block *) arg; 2834 struct ext4_sb_info *sbi; 2835 struct ext4_super_block *es; 2836 2837 sbi = EXT4_SB(sb); 2838 es = sbi->s_es; 2839 2840 if (es->s_error_count) 2841 /* fsck newer than v1.41.13 is needed to clean this condition. */ 2842 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", 2843 le32_to_cpu(es->s_error_count)); 2844 if (es->s_first_error_time) { 2845 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", 2846 sb->s_id, le32_to_cpu(es->s_first_error_time), 2847 (int) sizeof(es->s_first_error_func), 2848 es->s_first_error_func, 2849 le32_to_cpu(es->s_first_error_line)); 2850 if (es->s_first_error_ino) 2851 printk(": inode %u", 2852 le32_to_cpu(es->s_first_error_ino)); 2853 if (es->s_first_error_block) 2854 printk(": block %llu", (unsigned long long) 2855 le64_to_cpu(es->s_first_error_block)); 2856 printk("\n"); 2857 } 2858 if (es->s_last_error_time) { 2859 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", 2860 sb->s_id, le32_to_cpu(es->s_last_error_time), 2861 (int) sizeof(es->s_last_error_func), 2862 es->s_last_error_func, 2863 le32_to_cpu(es->s_last_error_line)); 2864 if (es->s_last_error_ino) 2865 printk(": inode %u", 2866 le32_to_cpu(es->s_last_error_ino)); 2867 if (es->s_last_error_block) 2868 printk(": block %llu", (unsigned long long) 2869 le64_to_cpu(es->s_last_error_block)); 2870 printk("\n"); 2871 } 2872 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ 2873 } 2874 2875 /* Find next suitable group and run ext4_init_inode_table */ 2876 static int ext4_run_li_request(struct ext4_li_request *elr) 2877 { 2878 struct ext4_group_desc *gdp = NULL; 2879 ext4_group_t group, ngroups; 2880 struct super_block *sb; 2881 unsigned long timeout = 0; 2882 int ret = 0; 2883 2884 sb = elr->lr_super; 2885 ngroups = EXT4_SB(sb)->s_groups_count; 2886 2887 sb_start_write(sb); 2888 for (group = elr->lr_next_group; group < ngroups; group++) { 2889 gdp = ext4_get_group_desc(sb, group, NULL); 2890 if (!gdp) { 2891 ret = 1; 2892 break; 2893 } 2894 2895 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 2896 break; 2897 } 2898 2899 if (group >= ngroups) 2900 ret = 1; 2901 2902 if (!ret) { 2903 timeout = jiffies; 2904 ret = ext4_init_inode_table(sb, group, 2905 elr->lr_timeout ? 0 : 1); 2906 if (elr->lr_timeout == 0) { 2907 timeout = (jiffies - timeout) * 2908 elr->lr_sbi->s_li_wait_mult; 2909 elr->lr_timeout = timeout; 2910 } 2911 elr->lr_next_sched = jiffies + elr->lr_timeout; 2912 elr->lr_next_group = group + 1; 2913 } 2914 sb_end_write(sb); 2915 2916 return ret; 2917 } 2918 2919 /* 2920 * Remove lr_request from the list_request and free the 2921 * request structure. Should be called with li_list_mtx held 2922 */ 2923 static void ext4_remove_li_request(struct ext4_li_request *elr) 2924 { 2925 struct ext4_sb_info *sbi; 2926 2927 if (!elr) 2928 return; 2929 2930 sbi = elr->lr_sbi; 2931 2932 list_del(&elr->lr_request); 2933 sbi->s_li_request = NULL; 2934 kfree(elr); 2935 } 2936 2937 static void ext4_unregister_li_request(struct super_block *sb) 2938 { 2939 mutex_lock(&ext4_li_mtx); 2940 if (!ext4_li_info) { 2941 mutex_unlock(&ext4_li_mtx); 2942 return; 2943 } 2944 2945 mutex_lock(&ext4_li_info->li_list_mtx); 2946 ext4_remove_li_request(EXT4_SB(sb)->s_li_request); 2947 mutex_unlock(&ext4_li_info->li_list_mtx); 2948 mutex_unlock(&ext4_li_mtx); 2949 } 2950 2951 static struct task_struct *ext4_lazyinit_task; 2952 2953 /* 2954 * This is the function where ext4lazyinit thread lives. It walks 2955 * through the request list searching for next scheduled filesystem. 2956 * When such a fs is found, run the lazy initialization request 2957 * (ext4_rn_li_request) and keep track of the time spend in this 2958 * function. Based on that time we compute next schedule time of 2959 * the request. When walking through the list is complete, compute 2960 * next waking time and put itself into sleep. 2961 */ 2962 static int ext4_lazyinit_thread(void *arg) 2963 { 2964 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; 2965 struct list_head *pos, *n; 2966 struct ext4_li_request *elr; 2967 unsigned long next_wakeup, cur; 2968 2969 BUG_ON(NULL == eli); 2970 2971 cont_thread: 2972 while (true) { 2973 next_wakeup = MAX_JIFFY_OFFSET; 2974 2975 mutex_lock(&eli->li_list_mtx); 2976 if (list_empty(&eli->li_request_list)) { 2977 mutex_unlock(&eli->li_list_mtx); 2978 goto exit_thread; 2979 } 2980 2981 list_for_each_safe(pos, n, &eli->li_request_list) { 2982 elr = list_entry(pos, struct ext4_li_request, 2983 lr_request); 2984 2985 if (time_after_eq(jiffies, elr->lr_next_sched)) { 2986 if (ext4_run_li_request(elr) != 0) { 2987 /* error, remove the lazy_init job */ 2988 ext4_remove_li_request(elr); 2989 continue; 2990 } 2991 } 2992 2993 if (time_before(elr->lr_next_sched, next_wakeup)) 2994 next_wakeup = elr->lr_next_sched; 2995 } 2996 mutex_unlock(&eli->li_list_mtx); 2997 2998 try_to_freeze(); 2999 3000 cur = jiffies; 3001 if ((time_after_eq(cur, next_wakeup)) || 3002 (MAX_JIFFY_OFFSET == next_wakeup)) { 3003 cond_resched(); 3004 continue; 3005 } 3006 3007 schedule_timeout_interruptible(next_wakeup - cur); 3008 3009 if (kthread_should_stop()) { 3010 ext4_clear_request_list(); 3011 goto exit_thread; 3012 } 3013 } 3014 3015 exit_thread: 3016 /* 3017 * It looks like the request list is empty, but we need 3018 * to check it under the li_list_mtx lock, to prevent any 3019 * additions into it, and of course we should lock ext4_li_mtx 3020 * to atomically free the list and ext4_li_info, because at 3021 * this point another ext4 filesystem could be registering 3022 * new one. 3023 */ 3024 mutex_lock(&ext4_li_mtx); 3025 mutex_lock(&eli->li_list_mtx); 3026 if (!list_empty(&eli->li_request_list)) { 3027 mutex_unlock(&eli->li_list_mtx); 3028 mutex_unlock(&ext4_li_mtx); 3029 goto cont_thread; 3030 } 3031 mutex_unlock(&eli->li_list_mtx); 3032 kfree(ext4_li_info); 3033 ext4_li_info = NULL; 3034 mutex_unlock(&ext4_li_mtx); 3035 3036 return 0; 3037 } 3038 3039 static void ext4_clear_request_list(void) 3040 { 3041 struct list_head *pos, *n; 3042 struct ext4_li_request *elr; 3043 3044 mutex_lock(&ext4_li_info->li_list_mtx); 3045 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { 3046 elr = list_entry(pos, struct ext4_li_request, 3047 lr_request); 3048 ext4_remove_li_request(elr); 3049 } 3050 mutex_unlock(&ext4_li_info->li_list_mtx); 3051 } 3052 3053 static int ext4_run_lazyinit_thread(void) 3054 { 3055 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, 3056 ext4_li_info, "ext4lazyinit"); 3057 if (IS_ERR(ext4_lazyinit_task)) { 3058 int err = PTR_ERR(ext4_lazyinit_task); 3059 ext4_clear_request_list(); 3060 kfree(ext4_li_info); 3061 ext4_li_info = NULL; 3062 printk(KERN_CRIT "EXT4-fs: error %d creating inode table " 3063 "initialization thread\n", 3064 err); 3065 return err; 3066 } 3067 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; 3068 return 0; 3069 } 3070 3071 /* 3072 * Check whether it make sense to run itable init. thread or not. 3073 * If there is at least one uninitialized inode table, return 3074 * corresponding group number, else the loop goes through all 3075 * groups and return total number of groups. 3076 */ 3077 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) 3078 { 3079 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; 3080 struct ext4_group_desc *gdp = NULL; 3081 3082 for (group = 0; group < ngroups; group++) { 3083 gdp = ext4_get_group_desc(sb, group, NULL); 3084 if (!gdp) 3085 continue; 3086 3087 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) 3088 break; 3089 } 3090 3091 return group; 3092 } 3093 3094 static int ext4_li_info_new(void) 3095 { 3096 struct ext4_lazy_init *eli = NULL; 3097 3098 eli = kzalloc(sizeof(*eli), GFP_KERNEL); 3099 if (!eli) 3100 return -ENOMEM; 3101 3102 INIT_LIST_HEAD(&eli->li_request_list); 3103 mutex_init(&eli->li_list_mtx); 3104 3105 eli->li_state |= EXT4_LAZYINIT_QUIT; 3106 3107 ext4_li_info = eli; 3108 3109 return 0; 3110 } 3111 3112 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, 3113 ext4_group_t start) 3114 { 3115 struct ext4_sb_info *sbi = EXT4_SB(sb); 3116 struct ext4_li_request *elr; 3117 3118 elr = kzalloc(sizeof(*elr), GFP_KERNEL); 3119 if (!elr) 3120 return NULL; 3121 3122 elr->lr_super = sb; 3123 elr->lr_sbi = sbi; 3124 elr->lr_next_group = start; 3125 3126 /* 3127 * Randomize first schedule time of the request to 3128 * spread the inode table initialization requests 3129 * better. 3130 */ 3131 elr->lr_next_sched = jiffies + (prandom_u32() % 3132 (EXT4_DEF_LI_MAX_START_DELAY * HZ)); 3133 return elr; 3134 } 3135 3136 int ext4_register_li_request(struct super_block *sb, 3137 ext4_group_t first_not_zeroed) 3138 { 3139 struct ext4_sb_info *sbi = EXT4_SB(sb); 3140 struct ext4_li_request *elr = NULL; 3141 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 3142 int ret = 0; 3143 3144 mutex_lock(&ext4_li_mtx); 3145 if (sbi->s_li_request != NULL) { 3146 /* 3147 * Reset timeout so it can be computed again, because 3148 * s_li_wait_mult might have changed. 3149 */ 3150 sbi->s_li_request->lr_timeout = 0; 3151 goto out; 3152 } 3153 3154 if (first_not_zeroed == ngroups || 3155 (sb->s_flags & MS_RDONLY) || 3156 !test_opt(sb, INIT_INODE_TABLE)) 3157 goto out; 3158 3159 elr = ext4_li_request_new(sb, first_not_zeroed); 3160 if (!elr) { 3161 ret = -ENOMEM; 3162 goto out; 3163 } 3164 3165 if (NULL == ext4_li_info) { 3166 ret = ext4_li_info_new(); 3167 if (ret) 3168 goto out; 3169 } 3170 3171 mutex_lock(&ext4_li_info->li_list_mtx); 3172 list_add(&elr->lr_request, &ext4_li_info->li_request_list); 3173 mutex_unlock(&ext4_li_info->li_list_mtx); 3174 3175 sbi->s_li_request = elr; 3176 /* 3177 * set elr to NULL here since it has been inserted to 3178 * the request_list and the removal and free of it is 3179 * handled by ext4_clear_request_list from now on. 3180 */ 3181 elr = NULL; 3182 3183 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3184 ret = ext4_run_lazyinit_thread(); 3185 if (ret) 3186 goto out; 3187 } 3188 out: 3189 mutex_unlock(&ext4_li_mtx); 3190 if (ret) 3191 kfree(elr); 3192 return ret; 3193 } 3194 3195 /* 3196 * We do not need to lock anything since this is called on 3197 * module unload. 3198 */ 3199 static void ext4_destroy_lazyinit_thread(void) 3200 { 3201 /* 3202 * If thread exited earlier 3203 * there's nothing to be done. 3204 */ 3205 if (!ext4_li_info || !ext4_lazyinit_task) 3206 return; 3207 3208 kthread_stop(ext4_lazyinit_task); 3209 } 3210 3211 static int set_journal_csum_feature_set(struct super_block *sb) 3212 { 3213 int ret = 1; 3214 int compat, incompat; 3215 struct ext4_sb_info *sbi = EXT4_SB(sb); 3216 3217 if (ext4_has_metadata_csum(sb)) { 3218 /* journal checksum v3 */ 3219 compat = 0; 3220 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; 3221 } else { 3222 /* journal checksum v1 */ 3223 compat = JBD2_FEATURE_COMPAT_CHECKSUM; 3224 incompat = 0; 3225 } 3226 3227 jbd2_journal_clear_features(sbi->s_journal, 3228 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3229 JBD2_FEATURE_INCOMPAT_CSUM_V3 | 3230 JBD2_FEATURE_INCOMPAT_CSUM_V2); 3231 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3232 ret = jbd2_journal_set_features(sbi->s_journal, 3233 compat, 0, 3234 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3235 incompat); 3236 } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 3237 ret = jbd2_journal_set_features(sbi->s_journal, 3238 compat, 0, 3239 incompat); 3240 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3241 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3242 } else { 3243 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3244 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3245 } 3246 3247 return ret; 3248 } 3249 3250 /* 3251 * Note: calculating the overhead so we can be compatible with 3252 * historical BSD practice is quite difficult in the face of 3253 * clusters/bigalloc. This is because multiple metadata blocks from 3254 * different block group can end up in the same allocation cluster. 3255 * Calculating the exact overhead in the face of clustered allocation 3256 * requires either O(all block bitmaps) in memory or O(number of block 3257 * groups**2) in time. We will still calculate the superblock for 3258 * older file systems --- and if we come across with a bigalloc file 3259 * system with zero in s_overhead_clusters the estimate will be close to 3260 * correct especially for very large cluster sizes --- but for newer 3261 * file systems, it's better to calculate this figure once at mkfs 3262 * time, and store it in the superblock. If the superblock value is 3263 * present (even for non-bigalloc file systems), we will use it. 3264 */ 3265 static int count_overhead(struct super_block *sb, ext4_group_t grp, 3266 char *buf) 3267 { 3268 struct ext4_sb_info *sbi = EXT4_SB(sb); 3269 struct ext4_group_desc *gdp; 3270 ext4_fsblk_t first_block, last_block, b; 3271 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3272 int s, j, count = 0; 3273 3274 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) 3275 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + 3276 sbi->s_itb_per_group + 2); 3277 3278 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + 3279 (grp * EXT4_BLOCKS_PER_GROUP(sb)); 3280 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; 3281 for (i = 0; i < ngroups; i++) { 3282 gdp = ext4_get_group_desc(sb, i, NULL); 3283 b = ext4_block_bitmap(sb, gdp); 3284 if (b >= first_block && b <= last_block) { 3285 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); 3286 count++; 3287 } 3288 b = ext4_inode_bitmap(sb, gdp); 3289 if (b >= first_block && b <= last_block) { 3290 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); 3291 count++; 3292 } 3293 b = ext4_inode_table(sb, gdp); 3294 if (b >= first_block && b + sbi->s_itb_per_group <= last_block) 3295 for (j = 0; j < sbi->s_itb_per_group; j++, b++) { 3296 int c = EXT4_B2C(sbi, b - first_block); 3297 ext4_set_bit(c, buf); 3298 count++; 3299 } 3300 if (i != grp) 3301 continue; 3302 s = 0; 3303 if (ext4_bg_has_super(sb, grp)) { 3304 ext4_set_bit(s++, buf); 3305 count++; 3306 } 3307 for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { 3308 ext4_set_bit(EXT4_B2C(sbi, s++), buf); 3309 count++; 3310 } 3311 } 3312 if (!count) 3313 return 0; 3314 return EXT4_CLUSTERS_PER_GROUP(sb) - 3315 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); 3316 } 3317 3318 /* 3319 * Compute the overhead and stash it in sbi->s_overhead 3320 */ 3321 int ext4_calculate_overhead(struct super_block *sb) 3322 { 3323 struct ext4_sb_info *sbi = EXT4_SB(sb); 3324 struct ext4_super_block *es = sbi->s_es; 3325 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3326 ext4_fsblk_t overhead = 0; 3327 char *buf = (char *) get_zeroed_page(GFP_NOFS); 3328 3329 if (!buf) 3330 return -ENOMEM; 3331 3332 /* 3333 * Compute the overhead (FS structures). This is constant 3334 * for a given filesystem unless the number of block groups 3335 * changes so we cache the previous value until it does. 3336 */ 3337 3338 /* 3339 * All of the blocks before first_data_block are overhead 3340 */ 3341 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); 3342 3343 /* 3344 * Add the overhead found in each block group 3345 */ 3346 for (i = 0; i < ngroups; i++) { 3347 int blks; 3348 3349 blks = count_overhead(sb, i, buf); 3350 overhead += blks; 3351 if (blks) 3352 memset(buf, 0, PAGE_SIZE); 3353 cond_resched(); 3354 } 3355 /* Add the internal journal blocks as well */ 3356 if (sbi->s_journal && !sbi->journal_bdev) 3357 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); 3358 3359 sbi->s_overhead = overhead; 3360 smp_wmb(); 3361 free_page((unsigned long) buf); 3362 return 0; 3363 } 3364 3365 3366 static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) 3367 { 3368 ext4_fsblk_t resv_clusters; 3369 3370 /* 3371 * There's no need to reserve anything when we aren't using extents. 3372 * The space estimates are exact, there are no unwritten extents, 3373 * hole punching doesn't need new metadata... This is needed especially 3374 * to keep ext2/3 backward compatibility. 3375 */ 3376 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) 3377 return 0; 3378 /* 3379 * By default we reserve 2% or 4096 clusters, whichever is smaller. 3380 * This should cover the situations where we can not afford to run 3381 * out of space like for example punch hole, or converting 3382 * unwritten extents in delalloc path. In most cases such 3383 * allocation would require 1, or 2 blocks, higher numbers are 3384 * very rare. 3385 */ 3386 resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> 3387 EXT4_SB(sb)->s_cluster_bits; 3388 3389 do_div(resv_clusters, 50); 3390 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); 3391 3392 return resv_clusters; 3393 } 3394 3395 3396 static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) 3397 { 3398 ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> 3399 sbi->s_cluster_bits; 3400 3401 if (count >= clusters) 3402 return -EINVAL; 3403 3404 atomic64_set(&sbi->s_resv_clusters, count); 3405 return 0; 3406 } 3407 3408 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3409 { 3410 char *orig_data = kstrdup(data, GFP_KERNEL); 3411 struct buffer_head *bh; 3412 struct ext4_super_block *es = NULL; 3413 struct ext4_sb_info *sbi; 3414 ext4_fsblk_t block; 3415 ext4_fsblk_t sb_block = get_sb_block(&data); 3416 ext4_fsblk_t logical_sb_block; 3417 unsigned long offset = 0; 3418 unsigned long journal_devnum = 0; 3419 unsigned long def_mount_opts; 3420 struct inode *root; 3421 char *cp; 3422 const char *descr; 3423 int ret = -ENOMEM; 3424 int blocksize, clustersize; 3425 unsigned int db_count; 3426 unsigned int i; 3427 int needs_recovery, has_huge_files, has_bigalloc; 3428 __u64 blocks_count; 3429 int err = 0; 3430 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3431 ext4_group_t first_not_zeroed; 3432 3433 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 3434 if (!sbi) 3435 goto out_free_orig; 3436 3437 sbi->s_blockgroup_lock = 3438 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 3439 if (!sbi->s_blockgroup_lock) { 3440 kfree(sbi); 3441 goto out_free_orig; 3442 } 3443 sb->s_fs_info = sbi; 3444 sbi->s_sb = sb; 3445 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 3446 sbi->s_sb_block = sb_block; 3447 if (sb->s_bdev->bd_part) 3448 sbi->s_sectors_written_start = 3449 part_stat_read(sb->s_bdev->bd_part, sectors[1]); 3450 #ifdef CONFIG_EXT4_FS_ENCRYPTION 3451 /* Modes of operations for file and directory encryption. */ 3452 sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; 3453 sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID; 3454 #endif 3455 3456 /* Cleanup superblock name */ 3457 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3458 *cp = '!'; 3459 3460 /* -EINVAL is default */ 3461 ret = -EINVAL; 3462 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 3463 if (!blocksize) { 3464 ext4_msg(sb, KERN_ERR, "unable to set blocksize"); 3465 goto out_fail; 3466 } 3467 3468 /* 3469 * The ext4 superblock will not be buffer aligned for other than 1kB 3470 * block sizes. We need to calculate the offset from buffer start. 3471 */ 3472 if (blocksize != EXT4_MIN_BLOCK_SIZE) { 3473 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3474 offset = do_div(logical_sb_block, blocksize); 3475 } else { 3476 logical_sb_block = sb_block; 3477 } 3478 3479 if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { 3480 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 3481 goto out_fail; 3482 } 3483 /* 3484 * Note: s_es must be initialized as soon as possible because 3485 * some ext4 macro-instructions depend on its value 3486 */ 3487 es = (struct ext4_super_block *) (bh->b_data + offset); 3488 sbi->s_es = es; 3489 sb->s_magic = le16_to_cpu(es->s_magic); 3490 if (sb->s_magic != EXT4_SUPER_MAGIC) 3491 goto cantfind_ext4; 3492 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 3493 3494 /* Warn if metadata_csum and gdt_csum are both set. */ 3495 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3496 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 3497 EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 3498 ext4_warning(sb, "metadata_csum and uninit_bg are " 3499 "redundant flags; please run fsck."); 3500 3501 /* Check for a known checksum algorithm */ 3502 if (!ext4_verify_csum_type(sb, es)) { 3503 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3504 "unknown checksum algorithm."); 3505 silent = 1; 3506 goto cantfind_ext4; 3507 } 3508 3509 /* Load the checksum driver */ 3510 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3511 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3512 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 3513 if (IS_ERR(sbi->s_chksum_driver)) { 3514 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 3515 ret = PTR_ERR(sbi->s_chksum_driver); 3516 sbi->s_chksum_driver = NULL; 3517 goto failed_mount; 3518 } 3519 } 3520 3521 /* Check superblock checksum */ 3522 if (!ext4_superblock_csum_verify(sb, es)) { 3523 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3524 "invalid superblock checksum. Run e2fsck?"); 3525 silent = 1; 3526 goto cantfind_ext4; 3527 } 3528 3529 /* Precompute checksum seed for all metadata */ 3530 if (ext4_has_metadata_csum(sb)) 3531 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3532 sizeof(es->s_uuid)); 3533 3534 /* Set defaults before we parse the mount options */ 3535 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 3536 set_opt(sb, INIT_INODE_TABLE); 3537 if (def_mount_opts & EXT4_DEFM_DEBUG) 3538 set_opt(sb, DEBUG); 3539 if (def_mount_opts & EXT4_DEFM_BSDGROUPS) 3540 set_opt(sb, GRPID); 3541 if (def_mount_opts & EXT4_DEFM_UID16) 3542 set_opt(sb, NO_UID32); 3543 /* xattr user namespace & acls are now defaulted on */ 3544 set_opt(sb, XATTR_USER); 3545 #ifdef CONFIG_EXT4_FS_POSIX_ACL 3546 set_opt(sb, POSIX_ACL); 3547 #endif 3548 /* don't forget to enable journal_csum when metadata_csum is enabled. */ 3549 if (ext4_has_metadata_csum(sb)) 3550 set_opt(sb, JOURNAL_CHECKSUM); 3551 3552 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3553 set_opt(sb, JOURNAL_DATA); 3554 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3555 set_opt(sb, ORDERED_DATA); 3556 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) 3557 set_opt(sb, WRITEBACK_DATA); 3558 3559 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) 3560 set_opt(sb, ERRORS_PANIC); 3561 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) 3562 set_opt(sb, ERRORS_CONT); 3563 else 3564 set_opt(sb, ERRORS_RO); 3565 /* block_validity enabled by default; disable with noblock_validity */ 3566 set_opt(sb, BLOCK_VALIDITY); 3567 if (def_mount_opts & EXT4_DEFM_DISCARD) 3568 set_opt(sb, DISCARD); 3569 3570 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); 3571 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); 3572 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 3573 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 3574 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 3575 3576 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) 3577 set_opt(sb, BARRIER); 3578 3579 /* 3580 * enable delayed allocation by default 3581 * Use -o nodelalloc to turn it off 3582 */ 3583 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && 3584 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3585 set_opt(sb, DELALLOC); 3586 3587 /* 3588 * set default s_li_wait_mult for lazyinit, for the case there is 3589 * no mount option specified. 3590 */ 3591 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; 3592 3593 if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, 3594 &journal_devnum, &journal_ioprio, 0)) { 3595 ext4_msg(sb, KERN_WARNING, 3596 "failed to parse options in superblock: %s", 3597 sbi->s_es->s_mount_opts); 3598 } 3599 sbi->s_def_mount_opt = sbi->s_mount_opt; 3600 if (!parse_options((char *) data, sb, &journal_devnum, 3601 &journal_ioprio, 0)) 3602 goto failed_mount; 3603 3604 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 3605 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " 3606 "with data=journal disables delayed " 3607 "allocation and O_DIRECT support!\n"); 3608 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 3609 ext4_msg(sb, KERN_ERR, "can't mount with " 3610 "both data=journal and delalloc"); 3611 goto failed_mount; 3612 } 3613 if (test_opt(sb, DIOREAD_NOLOCK)) { 3614 ext4_msg(sb, KERN_ERR, "can't mount with " 3615 "both data=journal and dioread_nolock"); 3616 goto failed_mount; 3617 } 3618 if (test_opt(sb, DAX)) { 3619 ext4_msg(sb, KERN_ERR, "can't mount with " 3620 "both data=journal and dax"); 3621 goto failed_mount; 3622 } 3623 if (test_opt(sb, DELALLOC)) 3624 clear_opt(sb, DELALLOC); 3625 } 3626 3627 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3628 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3629 3630 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && 3631 (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || 3632 EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 3633 EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) 3634 ext4_msg(sb, KERN_WARNING, 3635 "feature flags set on rev 0 fs, " 3636 "running e2fsck is recommended"); 3637 3638 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { 3639 set_opt2(sb, HURD_COMPAT); 3640 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 3641 EXT4_FEATURE_INCOMPAT_64BIT)) { 3642 ext4_msg(sb, KERN_ERR, 3643 "The Hurd can't support 64-bit file systems"); 3644 goto failed_mount; 3645 } 3646 } 3647 3648 if (IS_EXT2_SB(sb)) { 3649 if (ext2_feature_set_ok(sb)) 3650 ext4_msg(sb, KERN_INFO, "mounting ext2 file system " 3651 "using the ext4 subsystem"); 3652 else { 3653 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " 3654 "to feature incompatibilities"); 3655 goto failed_mount; 3656 } 3657 } 3658 3659 if (IS_EXT3_SB(sb)) { 3660 if (ext3_feature_set_ok(sb)) 3661 ext4_msg(sb, KERN_INFO, "mounting ext3 file system " 3662 "using the ext4 subsystem"); 3663 else { 3664 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " 3665 "to feature incompatibilities"); 3666 goto failed_mount; 3667 } 3668 } 3669 3670 /* 3671 * Check feature flags regardless of the revision level, since we 3672 * previously didn't change the revision level when setting the flags, 3673 * so there is a chance incompat flags are set on a rev 0 filesystem. 3674 */ 3675 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3676 goto failed_mount; 3677 3678 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 3679 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3680 blocksize > EXT4_MAX_BLOCK_SIZE) { 3681 ext4_msg(sb, KERN_ERR, 3682 "Unsupported filesystem blocksize %d", blocksize); 3683 goto failed_mount; 3684 } 3685 3686 if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { 3687 if (blocksize != PAGE_SIZE) { 3688 ext4_msg(sb, KERN_ERR, 3689 "error: unsupported blocksize for dax"); 3690 goto failed_mount; 3691 } 3692 if (!sb->s_bdev->bd_disk->fops->direct_access) { 3693 ext4_msg(sb, KERN_ERR, 3694 "error: device does not support dax"); 3695 goto failed_mount; 3696 } 3697 } 3698 3699 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) && 3700 es->s_encryption_level) { 3701 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d", 3702 es->s_encryption_level); 3703 goto failed_mount; 3704 } 3705 3706 if (sb->s_blocksize != blocksize) { 3707 /* Validate the filesystem blocksize */ 3708 if (!sb_set_blocksize(sb, blocksize)) { 3709 ext4_msg(sb, KERN_ERR, "bad block size %d", 3710 blocksize); 3711 goto failed_mount; 3712 } 3713 3714 brelse(bh); 3715 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3716 offset = do_div(logical_sb_block, blocksize); 3717 bh = sb_bread_unmovable(sb, logical_sb_block); 3718 if (!bh) { 3719 ext4_msg(sb, KERN_ERR, 3720 "Can't read superblock on 2nd try"); 3721 goto failed_mount; 3722 } 3723 es = (struct ext4_super_block *)(bh->b_data + offset); 3724 sbi->s_es = es; 3725 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 3726 ext4_msg(sb, KERN_ERR, 3727 "Magic mismatch, very weird!"); 3728 goto failed_mount; 3729 } 3730 } 3731 3732 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3733 EXT4_FEATURE_RO_COMPAT_HUGE_FILE); 3734 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, 3735 has_huge_files); 3736 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); 3737 3738 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 3739 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 3740 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; 3741 } else { 3742 sbi->s_inode_size = le16_to_cpu(es->s_inode_size); 3743 sbi->s_first_ino = le32_to_cpu(es->s_first_ino); 3744 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 3745 (!is_power_of_2(sbi->s_inode_size)) || 3746 (sbi->s_inode_size > blocksize)) { 3747 ext4_msg(sb, KERN_ERR, 3748 "unsupported inode size: %d", 3749 sbi->s_inode_size); 3750 goto failed_mount; 3751 } 3752 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 3753 sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); 3754 } 3755 3756 sbi->s_desc_size = le16_to_cpu(es->s_desc_size); 3757 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { 3758 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || 3759 sbi->s_desc_size > EXT4_MAX_DESC_SIZE || 3760 !is_power_of_2(sbi->s_desc_size)) { 3761 ext4_msg(sb, KERN_ERR, 3762 "unsupported descriptor size %lu", 3763 sbi->s_desc_size); 3764 goto failed_mount; 3765 } 3766 } else 3767 sbi->s_desc_size = EXT4_MIN_DESC_SIZE; 3768 3769 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); 3770 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); 3771 if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) 3772 goto cantfind_ext4; 3773 3774 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); 3775 if (sbi->s_inodes_per_block == 0) 3776 goto cantfind_ext4; 3777 sbi->s_itb_per_group = sbi->s_inodes_per_group / 3778 sbi->s_inodes_per_block; 3779 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); 3780 sbi->s_sbh = bh; 3781 sbi->s_mount_state = le16_to_cpu(es->s_state); 3782 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 3783 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 3784 3785 for (i = 0; i < 4; i++) 3786 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 3787 sbi->s_def_hash_version = es->s_def_hash_version; 3788 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { 3789 i = le32_to_cpu(es->s_flags); 3790 if (i & EXT2_FLAGS_UNSIGNED_HASH) 3791 sbi->s_hash_unsigned = 3; 3792 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { 3793 #ifdef __CHAR_UNSIGNED__ 3794 if (!(sb->s_flags & MS_RDONLY)) 3795 es->s_flags |= 3796 cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); 3797 sbi->s_hash_unsigned = 3; 3798 #else 3799 if (!(sb->s_flags & MS_RDONLY)) 3800 es->s_flags |= 3801 cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); 3802 #endif 3803 } 3804 } 3805 3806 /* Handle clustersize */ 3807 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); 3808 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, 3809 EXT4_FEATURE_RO_COMPAT_BIGALLOC); 3810 if (has_bigalloc) { 3811 if (clustersize < blocksize) { 3812 ext4_msg(sb, KERN_ERR, 3813 "cluster size (%d) smaller than " 3814 "block size (%d)", clustersize, blocksize); 3815 goto failed_mount; 3816 } 3817 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - 3818 le32_to_cpu(es->s_log_block_size); 3819 sbi->s_clusters_per_group = 3820 le32_to_cpu(es->s_clusters_per_group); 3821 if (sbi->s_clusters_per_group > blocksize * 8) { 3822 ext4_msg(sb, KERN_ERR, 3823 "#clusters per group too big: %lu", 3824 sbi->s_clusters_per_group); 3825 goto failed_mount; 3826 } 3827 if (sbi->s_blocks_per_group != 3828 (sbi->s_clusters_per_group * (clustersize / blocksize))) { 3829 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " 3830 "clusters per group (%lu) inconsistent", 3831 sbi->s_blocks_per_group, 3832 sbi->s_clusters_per_group); 3833 goto failed_mount; 3834 } 3835 } else { 3836 if (clustersize != blocksize) { 3837 ext4_warning(sb, "fragment/cluster size (%d) != " 3838 "block size (%d)", clustersize, 3839 blocksize); 3840 clustersize = blocksize; 3841 } 3842 if (sbi->s_blocks_per_group > blocksize * 8) { 3843 ext4_msg(sb, KERN_ERR, 3844 "#blocks per group too big: %lu", 3845 sbi->s_blocks_per_group); 3846 goto failed_mount; 3847 } 3848 sbi->s_clusters_per_group = sbi->s_blocks_per_group; 3849 sbi->s_cluster_bits = 0; 3850 } 3851 sbi->s_cluster_ratio = clustersize / blocksize; 3852 3853 if (sbi->s_inodes_per_group > blocksize * 8) { 3854 ext4_msg(sb, KERN_ERR, 3855 "#inodes per group too big: %lu", 3856 sbi->s_inodes_per_group); 3857 goto failed_mount; 3858 } 3859 3860 /* Do we have standard group size of clustersize * 8 blocks ? */ 3861 if (sbi->s_blocks_per_group == clustersize << 3) 3862 set_opt2(sb, STD_GROUP_SIZE); 3863 3864 /* 3865 * Test whether we have more sectors than will fit in sector_t, 3866 * and whether the max offset is addressable by the page cache. 3867 */ 3868 err = generic_check_addressable(sb->s_blocksize_bits, 3869 ext4_blocks_count(es)); 3870 if (err) { 3871 ext4_msg(sb, KERN_ERR, "filesystem" 3872 " too large to mount safely on this system"); 3873 if (sizeof(sector_t) < 8) 3874 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 3875 goto failed_mount; 3876 } 3877 3878 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 3879 goto cantfind_ext4; 3880 3881 /* check blocks count against device size */ 3882 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; 3883 if (blocks_count && ext4_blocks_count(es) > blocks_count) { 3884 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " 3885 "exceeds size of device (%llu blocks)", 3886 ext4_blocks_count(es), blocks_count); 3887 goto failed_mount; 3888 } 3889 3890 /* 3891 * It makes no sense for the first data block to be beyond the end 3892 * of the filesystem. 3893 */ 3894 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3895 ext4_msg(sb, KERN_WARNING, "bad geometry: first data " 3896 "block %u is beyond end of filesystem (%llu)", 3897 le32_to_cpu(es->s_first_data_block), 3898 ext4_blocks_count(es)); 3899 goto failed_mount; 3900 } 3901 blocks_count = (ext4_blocks_count(es) - 3902 le32_to_cpu(es->s_first_data_block) + 3903 EXT4_BLOCKS_PER_GROUP(sb) - 1); 3904 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); 3905 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { 3906 ext4_msg(sb, KERN_WARNING, "groups count too large: %u " 3907 "(block count %llu, first data block %u, " 3908 "blocks per group %lu)", sbi->s_groups_count, 3909 ext4_blocks_count(es), 3910 le32_to_cpu(es->s_first_data_block), 3911 EXT4_BLOCKS_PER_GROUP(sb)); 3912 goto failed_mount; 3913 } 3914 sbi->s_groups_count = blocks_count; 3915 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, 3916 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 3917 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 3918 EXT4_DESC_PER_BLOCK(sb); 3919 sbi->s_group_desc = ext4_kvmalloc(db_count * 3920 sizeof(struct buffer_head *), 3921 GFP_KERNEL); 3922 if (sbi->s_group_desc == NULL) { 3923 ext4_msg(sb, KERN_ERR, "not enough memory"); 3924 ret = -ENOMEM; 3925 goto failed_mount; 3926 } 3927 3928 if (ext4_proc_root) 3929 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3930 3931 if (sbi->s_proc) 3932 proc_create_data("options", S_IRUGO, sbi->s_proc, 3933 &ext4_seq_options_fops, sb); 3934 3935 bgl_lock_init(sbi->s_blockgroup_lock); 3936 3937 for (i = 0; i < db_count; i++) { 3938 block = descriptor_loc(sb, logical_sb_block, i); 3939 sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); 3940 if (!sbi->s_group_desc[i]) { 3941 ext4_msg(sb, KERN_ERR, 3942 "can't read group descriptor %d", i); 3943 db_count = i; 3944 goto failed_mount2; 3945 } 3946 } 3947 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3948 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3949 goto failed_mount2; 3950 } 3951 3952 sbi->s_gdb_count = db_count; 3953 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3954 spin_lock_init(&sbi->s_next_gen_lock); 3955 3956 setup_timer(&sbi->s_err_report, print_daily_error_info, 3957 (unsigned long) sb); 3958 3959 /* Register extent status tree shrinker */ 3960 if (ext4_es_register_shrinker(sbi)) 3961 goto failed_mount3; 3962 3963 sbi->s_stripe = ext4_get_stripe_size(sbi); 3964 sbi->s_extent_max_zeroout_kb = 32; 3965 3966 /* 3967 * set up enough so that it can read an inode 3968 */ 3969 sb->s_op = &ext4_sops; 3970 sb->s_export_op = &ext4_export_ops; 3971 sb->s_xattr = ext4_xattr_handlers; 3972 #ifdef CONFIG_QUOTA 3973 sb->dq_op = &ext4_quota_operations; 3974 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) 3975 sb->s_qcop = &dquot_quotactl_sysfile_ops; 3976 else 3977 sb->s_qcop = &ext4_qctl_operations; 3978 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 3979 #endif 3980 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); 3981 3982 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3983 mutex_init(&sbi->s_orphan_lock); 3984 3985 sb->s_root = NULL; 3986 3987 needs_recovery = (es->s_last_orphan != 0 || 3988 EXT4_HAS_INCOMPAT_FEATURE(sb, 3989 EXT4_FEATURE_INCOMPAT_RECOVER)); 3990 3991 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && 3992 !(sb->s_flags & MS_RDONLY)) 3993 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) 3994 goto failed_mount3a; 3995 3996 /* 3997 * The first inode we look at is the journal inode. Don't try 3998 * root first: it may be modified in the journal! 3999 */ 4000 if (!test_opt(sb, NOLOAD) && 4001 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 4002 if (ext4_load_journal(sb, es, journal_devnum)) 4003 goto failed_mount3a; 4004 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 4005 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 4006 ext4_msg(sb, KERN_ERR, "required journal recovery " 4007 "suppressed and not mounted read-only"); 4008 goto failed_mount_wq; 4009 } else { 4010 clear_opt(sb, DATA_FLAGS); 4011 sbi->s_journal = NULL; 4012 needs_recovery = 0; 4013 goto no_journal; 4014 } 4015 4016 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && 4017 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 4018 JBD2_FEATURE_INCOMPAT_64BIT)) { 4019 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 4020 goto failed_mount_wq; 4021 } 4022 4023 if (!set_journal_csum_feature_set(sb)) { 4024 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " 4025 "feature set"); 4026 goto failed_mount_wq; 4027 } 4028 4029 /* We have now updated the journal if required, so we can 4030 * validate the data journaling mode. */ 4031 switch (test_opt(sb, DATA_FLAGS)) { 4032 case 0: 4033 /* No mode set, assume a default based on the journal 4034 * capabilities: ORDERED_DATA if the journal can 4035 * cope, else JOURNAL_DATA 4036 */ 4037 if (jbd2_journal_check_available_features 4038 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) 4039 set_opt(sb, ORDERED_DATA); 4040 else 4041 set_opt(sb, JOURNAL_DATA); 4042 break; 4043 4044 case EXT4_MOUNT_ORDERED_DATA: 4045 case EXT4_MOUNT_WRITEBACK_DATA: 4046 if (!jbd2_journal_check_available_features 4047 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { 4048 ext4_msg(sb, KERN_ERR, "Journal does not support " 4049 "requested data journaling mode"); 4050 goto failed_mount_wq; 4051 } 4052 default: 4053 break; 4054 } 4055 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4056 4057 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 4058 4059 no_journal: 4060 if (ext4_mballoc_ready) { 4061 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); 4062 if (!sbi->s_mb_cache) { 4063 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 4064 goto failed_mount_wq; 4065 } 4066 } 4067 4068 if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) && 4069 !(sb->s_flags & MS_RDONLY) && 4070 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) { 4071 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT); 4072 ext4_commit_super(sb, 1); 4073 } 4074 4075 /* 4076 * Get the # of file system overhead blocks from the 4077 * superblock if present. 4078 */ 4079 if (es->s_overhead_clusters) 4080 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); 4081 else { 4082 err = ext4_calculate_overhead(sb); 4083 if (err) 4084 goto failed_mount_wq; 4085 } 4086 4087 /* 4088 * The maximum number of concurrent works can be high and 4089 * concurrency isn't really necessary. Limit it to 1. 4090 */ 4091 EXT4_SB(sb)->rsv_conversion_wq = 4092 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 4093 if (!EXT4_SB(sb)->rsv_conversion_wq) { 4094 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); 4095 ret = -ENOMEM; 4096 goto failed_mount4; 4097 } 4098 4099 /* 4100 * The jbd2_journal_load will have done any necessary log recovery, 4101 * so we can safely mount the rest of the filesystem now. 4102 */ 4103 4104 root = ext4_iget(sb, EXT4_ROOT_INO); 4105 if (IS_ERR(root)) { 4106 ext4_msg(sb, KERN_ERR, "get root inode failed"); 4107 ret = PTR_ERR(root); 4108 root = NULL; 4109 goto failed_mount4; 4110 } 4111 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 4112 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 4113 iput(root); 4114 goto failed_mount4; 4115 } 4116 sb->s_root = d_make_root(root); 4117 if (!sb->s_root) { 4118 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 4119 ret = -ENOMEM; 4120 goto failed_mount4; 4121 } 4122 4123 if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) 4124 sb->s_flags |= MS_RDONLY; 4125 4126 /* determine the minimum size of new large inodes, if present */ 4127 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 4128 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 4129 EXT4_GOOD_OLD_INODE_SIZE; 4130 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4131 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { 4132 if (sbi->s_want_extra_isize < 4133 le16_to_cpu(es->s_want_extra_isize)) 4134 sbi->s_want_extra_isize = 4135 le16_to_cpu(es->s_want_extra_isize); 4136 if (sbi->s_want_extra_isize < 4137 le16_to_cpu(es->s_min_extra_isize)) 4138 sbi->s_want_extra_isize = 4139 le16_to_cpu(es->s_min_extra_isize); 4140 } 4141 } 4142 /* Check if enough inode space is available */ 4143 if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > 4144 sbi->s_inode_size) { 4145 sbi->s_want_extra_isize = sizeof(struct ext4_inode) - 4146 EXT4_GOOD_OLD_INODE_SIZE; 4147 ext4_msg(sb, KERN_INFO, "required extra inode space not" 4148 "available"); 4149 } 4150 4151 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); 4152 if (err) { 4153 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4154 "reserved pool", ext4_calculate_resv_clusters(sb)); 4155 goto failed_mount4a; 4156 } 4157 4158 err = ext4_setup_system_zone(sb); 4159 if (err) { 4160 ext4_msg(sb, KERN_ERR, "failed to initialize system " 4161 "zone (%d)", err); 4162 goto failed_mount4a; 4163 } 4164 4165 ext4_ext_init(sb); 4166 err = ext4_mb_init(sb); 4167 if (err) { 4168 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 4169 err); 4170 goto failed_mount5; 4171 } 4172 4173 block = ext4_count_free_clusters(sb); 4174 ext4_free_blocks_count_set(sbi->s_es, 4175 EXT4_C2B(sbi, block)); 4176 err = percpu_counter_init(&sbi->s_freeclusters_counter, block, 4177 GFP_KERNEL); 4178 if (!err) { 4179 unsigned long freei = ext4_count_free_inodes(sb); 4180 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); 4181 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, 4182 GFP_KERNEL); 4183 } 4184 if (!err) 4185 err = percpu_counter_init(&sbi->s_dirs_counter, 4186 ext4_count_dirs(sb), GFP_KERNEL); 4187 if (!err) 4188 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, 4189 GFP_KERNEL); 4190 if (err) { 4191 ext4_msg(sb, KERN_ERR, "insufficient memory"); 4192 goto failed_mount6; 4193 } 4194 4195 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 4196 if (!ext4_fill_flex_info(sb)) { 4197 ext4_msg(sb, KERN_ERR, 4198 "unable to initialize " 4199 "flex_bg meta info!"); 4200 goto failed_mount6; 4201 } 4202 4203 err = ext4_register_li_request(sb, first_not_zeroed); 4204 if (err) 4205 goto failed_mount6; 4206 4207 sbi->s_kobj.kset = ext4_kset; 4208 init_completion(&sbi->s_kobj_unregister); 4209 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 4210 "%s", sb->s_id); 4211 if (err) 4212 goto failed_mount7; 4213 4214 #ifdef CONFIG_QUOTA 4215 /* Enable quota usage during mount. */ 4216 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && 4217 !(sb->s_flags & MS_RDONLY)) { 4218 err = ext4_enable_quotas(sb); 4219 if (err) 4220 goto failed_mount8; 4221 } 4222 #endif /* CONFIG_QUOTA */ 4223 4224 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 4225 ext4_orphan_cleanup(sb, es); 4226 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 4227 if (needs_recovery) { 4228 ext4_msg(sb, KERN_INFO, "recovery complete"); 4229 ext4_mark_recovery_complete(sb, es); 4230 } 4231 if (EXT4_SB(sb)->s_journal) { 4232 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 4233 descr = " journalled data mode"; 4234 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 4235 descr = " ordered data mode"; 4236 else 4237 descr = " writeback data mode"; 4238 } else 4239 descr = "out journal"; 4240 4241 if (test_opt(sb, DISCARD)) { 4242 struct request_queue *q = bdev_get_queue(sb->s_bdev); 4243 if (!blk_queue_discard(q)) 4244 ext4_msg(sb, KERN_WARNING, 4245 "mounting with \"discard\" option, but " 4246 "the device does not support discard"); 4247 } 4248 4249 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " 4250 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 4251 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 4252 4253 if (es->s_error_count) 4254 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 4255 4256 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ 4257 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); 4258 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); 4259 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); 4260 4261 kfree(orig_data); 4262 return 0; 4263 4264 cantfind_ext4: 4265 if (!silent) 4266 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 4267 goto failed_mount; 4268 4269 #ifdef CONFIG_QUOTA 4270 failed_mount8: 4271 kobject_del(&sbi->s_kobj); 4272 #endif 4273 failed_mount7: 4274 ext4_unregister_li_request(sb); 4275 failed_mount6: 4276 ext4_mb_release(sb); 4277 if (sbi->s_flex_groups) 4278 kvfree(sbi->s_flex_groups); 4279 percpu_counter_destroy(&sbi->s_freeclusters_counter); 4280 percpu_counter_destroy(&sbi->s_freeinodes_counter); 4281 percpu_counter_destroy(&sbi->s_dirs_counter); 4282 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 4283 failed_mount5: 4284 ext4_ext_release(sb); 4285 ext4_release_system_zone(sb); 4286 failed_mount4a: 4287 dput(sb->s_root); 4288 sb->s_root = NULL; 4289 failed_mount4: 4290 ext4_msg(sb, KERN_ERR, "mount failed"); 4291 if (EXT4_SB(sb)->rsv_conversion_wq) 4292 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4293 failed_mount_wq: 4294 if (sbi->s_journal) { 4295 jbd2_journal_destroy(sbi->s_journal); 4296 sbi->s_journal = NULL; 4297 } 4298 failed_mount3a: 4299 ext4_es_unregister_shrinker(sbi); 4300 failed_mount3: 4301 del_timer_sync(&sbi->s_err_report); 4302 if (sbi->s_mmp_tsk) 4303 kthread_stop(sbi->s_mmp_tsk); 4304 failed_mount2: 4305 for (i = 0; i < db_count; i++) 4306 brelse(sbi->s_group_desc[i]); 4307 kvfree(sbi->s_group_desc); 4308 failed_mount: 4309 if (sbi->s_chksum_driver) 4310 crypto_free_shash(sbi->s_chksum_driver); 4311 if (sbi->s_proc) { 4312 remove_proc_entry("options", sbi->s_proc); 4313 remove_proc_entry(sb->s_id, ext4_proc_root); 4314 } 4315 #ifdef CONFIG_QUOTA 4316 for (i = 0; i < EXT4_MAXQUOTAS; i++) 4317 kfree(sbi->s_qf_names[i]); 4318 #endif 4319 ext4_blkdev_remove(sbi); 4320 brelse(bh); 4321 out_fail: 4322 sb->s_fs_info = NULL; 4323 kfree(sbi->s_blockgroup_lock); 4324 kfree(sbi); 4325 out_free_orig: 4326 kfree(orig_data); 4327 return err ? err : ret; 4328 } 4329 4330 /* 4331 * Setup any per-fs journal parameters now. We'll do this both on 4332 * initial mount, once the journal has been initialised but before we've 4333 * done any recovery; and again on any subsequent remount. 4334 */ 4335 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) 4336 { 4337 struct ext4_sb_info *sbi = EXT4_SB(sb); 4338 4339 journal->j_commit_interval = sbi->s_commit_interval; 4340 journal->j_min_batch_time = sbi->s_min_batch_time; 4341 journal->j_max_batch_time = sbi->s_max_batch_time; 4342 4343 write_lock(&journal->j_state_lock); 4344 if (test_opt(sb, BARRIER)) 4345 journal->j_flags |= JBD2_BARRIER; 4346 else 4347 journal->j_flags &= ~JBD2_BARRIER; 4348 if (test_opt(sb, DATA_ERR_ABORT)) 4349 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; 4350 else 4351 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; 4352 write_unlock(&journal->j_state_lock); 4353 } 4354 4355 static journal_t *ext4_get_journal(struct super_block *sb, 4356 unsigned int journal_inum) 4357 { 4358 struct inode *journal_inode; 4359 journal_t *journal; 4360 4361 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4362 4363 /* First, test for the existence of a valid inode on disk. Bad 4364 * things happen if we iget() an unused inode, as the subsequent 4365 * iput() will try to delete it. */ 4366 4367 journal_inode = ext4_iget(sb, journal_inum); 4368 if (IS_ERR(journal_inode)) { 4369 ext4_msg(sb, KERN_ERR, "no journal found"); 4370 return NULL; 4371 } 4372 if (!journal_inode->i_nlink) { 4373 make_bad_inode(journal_inode); 4374 iput(journal_inode); 4375 ext4_msg(sb, KERN_ERR, "journal inode is deleted"); 4376 return NULL; 4377 } 4378 4379 jbd_debug(2, "Journal inode found at %p: %lld bytes\n", 4380 journal_inode, journal_inode->i_size); 4381 if (!S_ISREG(journal_inode->i_mode)) { 4382 ext4_msg(sb, KERN_ERR, "invalid journal inode"); 4383 iput(journal_inode); 4384 return NULL; 4385 } 4386 4387 journal = jbd2_journal_init_inode(journal_inode); 4388 if (!journal) { 4389 ext4_msg(sb, KERN_ERR, "Could not load journal inode"); 4390 iput(journal_inode); 4391 return NULL; 4392 } 4393 journal->j_private = sb; 4394 ext4_init_journal_params(sb, journal); 4395 return journal; 4396 } 4397 4398 static journal_t *ext4_get_dev_journal(struct super_block *sb, 4399 dev_t j_dev) 4400 { 4401 struct buffer_head *bh; 4402 journal_t *journal; 4403 ext4_fsblk_t start; 4404 ext4_fsblk_t len; 4405 int hblock, blocksize; 4406 ext4_fsblk_t sb_block; 4407 unsigned long offset; 4408 struct ext4_super_block *es; 4409 struct block_device *bdev; 4410 4411 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4412 4413 bdev = ext4_blkdev_get(j_dev, sb); 4414 if (bdev == NULL) 4415 return NULL; 4416 4417 blocksize = sb->s_blocksize; 4418 hblock = bdev_logical_block_size(bdev); 4419 if (blocksize < hblock) { 4420 ext4_msg(sb, KERN_ERR, 4421 "blocksize too small for journal device"); 4422 goto out_bdev; 4423 } 4424 4425 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 4426 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 4427 set_blocksize(bdev, blocksize); 4428 if (!(bh = __bread(bdev, sb_block, blocksize))) { 4429 ext4_msg(sb, KERN_ERR, "couldn't read superblock of " 4430 "external journal"); 4431 goto out_bdev; 4432 } 4433 4434 es = (struct ext4_super_block *) (bh->b_data + offset); 4435 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 4436 !(le32_to_cpu(es->s_feature_incompat) & 4437 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { 4438 ext4_msg(sb, KERN_ERR, "external journal has " 4439 "bad superblock"); 4440 brelse(bh); 4441 goto out_bdev; 4442 } 4443 4444 if ((le32_to_cpu(es->s_feature_ro_compat) & 4445 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 4446 es->s_checksum != ext4_superblock_csum(sb, es)) { 4447 ext4_msg(sb, KERN_ERR, "external journal has " 4448 "corrupt superblock"); 4449 brelse(bh); 4450 goto out_bdev; 4451 } 4452 4453 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 4454 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 4455 brelse(bh); 4456 goto out_bdev; 4457 } 4458 4459 len = ext4_blocks_count(es); 4460 start = sb_block + 1; 4461 brelse(bh); /* we're done with the superblock */ 4462 4463 journal = jbd2_journal_init_dev(bdev, sb->s_bdev, 4464 start, len, blocksize); 4465 if (!journal) { 4466 ext4_msg(sb, KERN_ERR, "failed to create device journal"); 4467 goto out_bdev; 4468 } 4469 journal->j_private = sb; 4470 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer); 4471 wait_on_buffer(journal->j_sb_buffer); 4472 if (!buffer_uptodate(journal->j_sb_buffer)) { 4473 ext4_msg(sb, KERN_ERR, "I/O error on journal device"); 4474 goto out_journal; 4475 } 4476 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 4477 ext4_msg(sb, KERN_ERR, "External journal has more than one " 4478 "user (unsupported) - %d", 4479 be32_to_cpu(journal->j_superblock->s_nr_users)); 4480 goto out_journal; 4481 } 4482 EXT4_SB(sb)->journal_bdev = bdev; 4483 ext4_init_journal_params(sb, journal); 4484 return journal; 4485 4486 out_journal: 4487 jbd2_journal_destroy(journal); 4488 out_bdev: 4489 ext4_blkdev_put(bdev); 4490 return NULL; 4491 } 4492 4493 static int ext4_load_journal(struct super_block *sb, 4494 struct ext4_super_block *es, 4495 unsigned long journal_devnum) 4496 { 4497 journal_t *journal; 4498 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); 4499 dev_t journal_dev; 4500 int err = 0; 4501 int really_read_only; 4502 4503 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4504 4505 if (journal_devnum && 4506 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 4507 ext4_msg(sb, KERN_INFO, "external journal device major/minor " 4508 "numbers have changed"); 4509 journal_dev = new_decode_dev(journal_devnum); 4510 } else 4511 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 4512 4513 really_read_only = bdev_read_only(sb->s_bdev); 4514 4515 /* 4516 * Are we loading a blank journal or performing recovery after a 4517 * crash? For recovery, we need to check in advance whether we 4518 * can get read-write access to the device. 4519 */ 4520 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 4521 if (sb->s_flags & MS_RDONLY) { 4522 ext4_msg(sb, KERN_INFO, "INFO: recovery " 4523 "required on readonly filesystem"); 4524 if (really_read_only) { 4525 ext4_msg(sb, KERN_ERR, "write access " 4526 "unavailable, cannot proceed"); 4527 return -EROFS; 4528 } 4529 ext4_msg(sb, KERN_INFO, "write access will " 4530 "be enabled during recovery"); 4531 } 4532 } 4533 4534 if (journal_inum && journal_dev) { 4535 ext4_msg(sb, KERN_ERR, "filesystem has both journal " 4536 "and inode journals!"); 4537 return -EINVAL; 4538 } 4539 4540 if (journal_inum) { 4541 if (!(journal = ext4_get_journal(sb, journal_inum))) 4542 return -EINVAL; 4543 } else { 4544 if (!(journal = ext4_get_dev_journal(sb, journal_dev))) 4545 return -EINVAL; 4546 } 4547 4548 if (!(journal->j_flags & JBD2_BARRIER)) 4549 ext4_msg(sb, KERN_INFO, "barriers disabled"); 4550 4551 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) 4552 err = jbd2_journal_wipe(journal, !really_read_only); 4553 if (!err) { 4554 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); 4555 if (save) 4556 memcpy(save, ((char *) es) + 4557 EXT4_S_ERR_START, EXT4_S_ERR_LEN); 4558 err = jbd2_journal_load(journal); 4559 if (save) 4560 memcpy(((char *) es) + EXT4_S_ERR_START, 4561 save, EXT4_S_ERR_LEN); 4562 kfree(save); 4563 } 4564 4565 if (err) { 4566 ext4_msg(sb, KERN_ERR, "error loading journal"); 4567 jbd2_journal_destroy(journal); 4568 return err; 4569 } 4570 4571 EXT4_SB(sb)->s_journal = journal; 4572 ext4_clear_journal_err(sb, es); 4573 4574 if (!really_read_only && journal_devnum && 4575 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 4576 es->s_journal_dev = cpu_to_le32(journal_devnum); 4577 4578 /* Make sure we flush the recovery flag to disk. */ 4579 ext4_commit_super(sb, 1); 4580 } 4581 4582 return 0; 4583 } 4584 4585 static int ext4_commit_super(struct super_block *sb, int sync) 4586 { 4587 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 4588 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4589 int error = 0; 4590 4591 if (!sbh) 4592 return error; 4593 if (buffer_write_io_error(sbh)) { 4594 /* 4595 * Oh, dear. A previous attempt to write the 4596 * superblock failed. This could happen because the 4597 * USB device was yanked out. Or it could happen to 4598 * be a transient write error and maybe the block will 4599 * be remapped. Nothing we can do but to retry the 4600 * write and hope for the best. 4601 */ 4602 ext4_msg(sb, KERN_ERR, "previous I/O error to " 4603 "superblock detected"); 4604 clear_buffer_write_io_error(sbh); 4605 set_buffer_uptodate(sbh); 4606 } 4607 /* 4608 * If the file system is mounted read-only, don't update the 4609 * superblock write time. This avoids updating the superblock 4610 * write time when we are mounting the root file system 4611 * read/only but we need to replay the journal; at that point, 4612 * for people who are east of GMT and who make their clock 4613 * tick in localtime for Windows bug-for-bug compatibility, 4614 * the clock is set in the future, and this will cause e2fsck 4615 * to complain and force a full file system check. 4616 */ 4617 if (!(sb->s_flags & MS_RDONLY)) 4618 es->s_wtime = cpu_to_le32(get_seconds()); 4619 if (sb->s_bdev->bd_part) 4620 es->s_kbytes_written = 4621 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 4622 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 4623 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 4624 else 4625 es->s_kbytes_written = 4626 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4627 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) 4628 ext4_free_blocks_count_set(es, 4629 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( 4630 &EXT4_SB(sb)->s_freeclusters_counter))); 4631 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) 4632 es->s_free_inodes_count = 4633 cpu_to_le32(percpu_counter_sum_positive( 4634 &EXT4_SB(sb)->s_freeinodes_counter)); 4635 BUFFER_TRACE(sbh, "marking dirty"); 4636 ext4_superblock_csum_set(sb); 4637 mark_buffer_dirty(sbh); 4638 if (sync) { 4639 error = sync_dirty_buffer(sbh); 4640 if (error) 4641 return error; 4642 4643 error = buffer_write_io_error(sbh); 4644 if (error) { 4645 ext4_msg(sb, KERN_ERR, "I/O error while writing " 4646 "superblock"); 4647 clear_buffer_write_io_error(sbh); 4648 set_buffer_uptodate(sbh); 4649 } 4650 } 4651 return error; 4652 } 4653 4654 /* 4655 * Have we just finished recovery? If so, and if we are mounting (or 4656 * remounting) the filesystem readonly, then we will end up with a 4657 * consistent fs on disk. Record that fact. 4658 */ 4659 static void ext4_mark_recovery_complete(struct super_block *sb, 4660 struct ext4_super_block *es) 4661 { 4662 journal_t *journal = EXT4_SB(sb)->s_journal; 4663 4664 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 4665 BUG_ON(journal != NULL); 4666 return; 4667 } 4668 jbd2_journal_lock_updates(journal); 4669 if (jbd2_journal_flush(journal) < 0) 4670 goto out; 4671 4672 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 4673 sb->s_flags & MS_RDONLY) { 4674 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4675 ext4_commit_super(sb, 1); 4676 } 4677 4678 out: 4679 jbd2_journal_unlock_updates(journal); 4680 } 4681 4682 /* 4683 * If we are mounting (or read-write remounting) a filesystem whose journal 4684 * has recorded an error from a previous lifetime, move that error to the 4685 * main filesystem now. 4686 */ 4687 static void ext4_clear_journal_err(struct super_block *sb, 4688 struct ext4_super_block *es) 4689 { 4690 journal_t *journal; 4691 int j_errno; 4692 const char *errstr; 4693 4694 BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); 4695 4696 journal = EXT4_SB(sb)->s_journal; 4697 4698 /* 4699 * Now check for any error status which may have been recorded in the 4700 * journal by a prior ext4_error() or ext4_abort() 4701 */ 4702 4703 j_errno = jbd2_journal_errno(journal); 4704 if (j_errno) { 4705 char nbuf[16]; 4706 4707 errstr = ext4_decode_error(sb, j_errno, nbuf); 4708 ext4_warning(sb, "Filesystem error recorded " 4709 "from previous mount: %s", errstr); 4710 ext4_warning(sb, "Marking fs in need of filesystem check."); 4711 4712 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 4713 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 4714 ext4_commit_super(sb, 1); 4715 4716 jbd2_journal_clear_err(journal); 4717 jbd2_journal_update_sb_errno(journal); 4718 } 4719 } 4720 4721 /* 4722 * Force the running and committing transactions to commit, 4723 * and wait on the commit. 4724 */ 4725 int ext4_force_commit(struct super_block *sb) 4726 { 4727 journal_t *journal; 4728 4729 if (sb->s_flags & MS_RDONLY) 4730 return 0; 4731 4732 journal = EXT4_SB(sb)->s_journal; 4733 return ext4_journal_force_commit(journal); 4734 } 4735 4736 static int ext4_sync_fs(struct super_block *sb, int wait) 4737 { 4738 int ret = 0; 4739 tid_t target; 4740 bool needs_barrier = false; 4741 struct ext4_sb_info *sbi = EXT4_SB(sb); 4742 4743 trace_ext4_sync_fs(sb, wait); 4744 flush_workqueue(sbi->rsv_conversion_wq); 4745 /* 4746 * Writeback quota in non-journalled quota case - journalled quota has 4747 * no dirty dquots 4748 */ 4749 dquot_writeback_dquots(sb, -1); 4750 /* 4751 * Data writeback is possible w/o journal transaction, so barrier must 4752 * being sent at the end of the function. But we can skip it if 4753 * transaction_commit will do it for us. 4754 */ 4755 if (sbi->s_journal) { 4756 target = jbd2_get_latest_transaction(sbi->s_journal); 4757 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && 4758 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) 4759 needs_barrier = true; 4760 4761 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 4762 if (wait) 4763 ret = jbd2_log_wait_commit(sbi->s_journal, 4764 target); 4765 } 4766 } else if (wait && test_opt(sb, BARRIER)) 4767 needs_barrier = true; 4768 if (needs_barrier) { 4769 int err; 4770 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); 4771 if (!ret) 4772 ret = err; 4773 } 4774 4775 return ret; 4776 } 4777 4778 /* 4779 * LVM calls this function before a (read-only) snapshot is created. This 4780 * gives us a chance to flush the journal completely and mark the fs clean. 4781 * 4782 * Note that only this function cannot bring a filesystem to be in a clean 4783 * state independently. It relies on upper layer to stop all data & metadata 4784 * modifications. 4785 */ 4786 static int ext4_freeze(struct super_block *sb) 4787 { 4788 int error = 0; 4789 journal_t *journal; 4790 4791 if (sb->s_flags & MS_RDONLY) 4792 return 0; 4793 4794 journal = EXT4_SB(sb)->s_journal; 4795 4796 if (journal) { 4797 /* Now we set up the journal barrier. */ 4798 jbd2_journal_lock_updates(journal); 4799 4800 /* 4801 * Don't clear the needs_recovery flag if we failed to 4802 * flush the journal. 4803 */ 4804 error = jbd2_journal_flush(journal); 4805 if (error < 0) 4806 goto out; 4807 } 4808 4809 /* Journal blocked and flushed, clear needs_recovery flag. */ 4810 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4811 error = ext4_commit_super(sb, 1); 4812 out: 4813 if (journal) 4814 /* we rely on upper layer to stop further updates */ 4815 jbd2_journal_unlock_updates(journal); 4816 return error; 4817 } 4818 4819 /* 4820 * Called by LVM after the snapshot is done. We need to reset the RECOVER 4821 * flag here, even though the filesystem is not technically dirty yet. 4822 */ 4823 static int ext4_unfreeze(struct super_block *sb) 4824 { 4825 if (sb->s_flags & MS_RDONLY) 4826 return 0; 4827 4828 /* Reset the needs_recovery flag before the fs is unlocked. */ 4829 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4830 ext4_commit_super(sb, 1); 4831 return 0; 4832 } 4833 4834 /* 4835 * Structure to save mount options for ext4_remount's benefit 4836 */ 4837 struct ext4_mount_options { 4838 unsigned long s_mount_opt; 4839 unsigned long s_mount_opt2; 4840 kuid_t s_resuid; 4841 kgid_t s_resgid; 4842 unsigned long s_commit_interval; 4843 u32 s_min_batch_time, s_max_batch_time; 4844 #ifdef CONFIG_QUOTA 4845 int s_jquota_fmt; 4846 char *s_qf_names[EXT4_MAXQUOTAS]; 4847 #endif 4848 }; 4849 4850 static int ext4_remount(struct super_block *sb, int *flags, char *data) 4851 { 4852 struct ext4_super_block *es; 4853 struct ext4_sb_info *sbi = EXT4_SB(sb); 4854 unsigned long old_sb_flags; 4855 struct ext4_mount_options old_opts; 4856 int enable_quota = 0; 4857 ext4_group_t g; 4858 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4859 int err = 0; 4860 #ifdef CONFIG_QUOTA 4861 int i, j; 4862 #endif 4863 char *orig_data = kstrdup(data, GFP_KERNEL); 4864 4865 /* Store the original options */ 4866 old_sb_flags = sb->s_flags; 4867 old_opts.s_mount_opt = sbi->s_mount_opt; 4868 old_opts.s_mount_opt2 = sbi->s_mount_opt2; 4869 old_opts.s_resuid = sbi->s_resuid; 4870 old_opts.s_resgid = sbi->s_resgid; 4871 old_opts.s_commit_interval = sbi->s_commit_interval; 4872 old_opts.s_min_batch_time = sbi->s_min_batch_time; 4873 old_opts.s_max_batch_time = sbi->s_max_batch_time; 4874 #ifdef CONFIG_QUOTA 4875 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4876 for (i = 0; i < EXT4_MAXQUOTAS; i++) 4877 if (sbi->s_qf_names[i]) { 4878 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], 4879 GFP_KERNEL); 4880 if (!old_opts.s_qf_names[i]) { 4881 for (j = 0; j < i; j++) 4882 kfree(old_opts.s_qf_names[j]); 4883 kfree(orig_data); 4884 return -ENOMEM; 4885 } 4886 } else 4887 old_opts.s_qf_names[i] = NULL; 4888 #endif 4889 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 4890 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 4891 4892 if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { 4893 err = -EINVAL; 4894 goto restore_opts; 4895 } 4896 4897 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ 4898 test_opt(sb, JOURNAL_CHECKSUM)) { 4899 ext4_msg(sb, KERN_ERR, "changing journal_checksum " 4900 "during remount not supported; ignoring"); 4901 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM; 4902 } 4903 4904 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 4905 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 4906 ext4_msg(sb, KERN_ERR, "can't mount with " 4907 "both data=journal and delalloc"); 4908 err = -EINVAL; 4909 goto restore_opts; 4910 } 4911 if (test_opt(sb, DIOREAD_NOLOCK)) { 4912 ext4_msg(sb, KERN_ERR, "can't mount with " 4913 "both data=journal and dioread_nolock"); 4914 err = -EINVAL; 4915 goto restore_opts; 4916 } 4917 if (test_opt(sb, DAX)) { 4918 ext4_msg(sb, KERN_ERR, "can't mount with " 4919 "both data=journal and dax"); 4920 err = -EINVAL; 4921 goto restore_opts; 4922 } 4923 } 4924 4925 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { 4926 ext4_msg(sb, KERN_WARNING, "warning: refusing change of " 4927 "dax flag with busy inodes while remounting"); 4928 sbi->s_mount_opt ^= EXT4_MOUNT_DAX; 4929 } 4930 4931 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 4932 ext4_abort(sb, "Abort forced by user"); 4933 4934 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 4935 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 4936 4937 es = sbi->s_es; 4938 4939 if (sbi->s_journal) { 4940 ext4_init_journal_params(sb, sbi->s_journal); 4941 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 4942 } 4943 4944 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 4945 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { 4946 err = -EROFS; 4947 goto restore_opts; 4948 } 4949 4950 if (*flags & MS_RDONLY) { 4951 err = sync_filesystem(sb); 4952 if (err < 0) 4953 goto restore_opts; 4954 err = dquot_suspend(sb, -1); 4955 if (err < 0) 4956 goto restore_opts; 4957 4958 /* 4959 * First of all, the unconditional stuff we have to do 4960 * to disable replay of the journal when we next remount 4961 */ 4962 sb->s_flags |= MS_RDONLY; 4963 4964 /* 4965 * OK, test if we are remounting a valid rw partition 4966 * readonly, and if so set the rdonly flag and then 4967 * mark the partition as valid again. 4968 */ 4969 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && 4970 (sbi->s_mount_state & EXT4_VALID_FS)) 4971 es->s_state = cpu_to_le16(sbi->s_mount_state); 4972 4973 if (sbi->s_journal) 4974 ext4_mark_recovery_complete(sb, es); 4975 } else { 4976 /* Make sure we can mount this feature set readwrite */ 4977 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4978 EXT4_FEATURE_RO_COMPAT_READONLY) || 4979 !ext4_feature_set_ok(sb, 0)) { 4980 err = -EROFS; 4981 goto restore_opts; 4982 } 4983 /* 4984 * Make sure the group descriptor checksums 4985 * are sane. If they aren't, refuse to remount r/w. 4986 */ 4987 for (g = 0; g < sbi->s_groups_count; g++) { 4988 struct ext4_group_desc *gdp = 4989 ext4_get_group_desc(sb, g, NULL); 4990 4991 if (!ext4_group_desc_csum_verify(sb, g, gdp)) { 4992 ext4_msg(sb, KERN_ERR, 4993 "ext4_remount: Checksum for group %u failed (%u!=%u)", 4994 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), 4995 le16_to_cpu(gdp->bg_checksum)); 4996 err = -EINVAL; 4997 goto restore_opts; 4998 } 4999 } 5000 5001 /* 5002 * If we have an unprocessed orphan list hanging 5003 * around from a previously readonly bdev mount, 5004 * require a full umount/remount for now. 5005 */ 5006 if (es->s_last_orphan) { 5007 ext4_msg(sb, KERN_WARNING, "Couldn't " 5008 "remount RDWR because of unprocessed " 5009 "orphan inode list. Please " 5010 "umount/remount instead"); 5011 err = -EINVAL; 5012 goto restore_opts; 5013 } 5014 5015 /* 5016 * Mounting a RDONLY partition read-write, so reread 5017 * and store the current valid flag. (It may have 5018 * been changed by e2fsck since we originally mounted 5019 * the partition.) 5020 */ 5021 if (sbi->s_journal) 5022 ext4_clear_journal_err(sb, es); 5023 sbi->s_mount_state = le16_to_cpu(es->s_state); 5024 if (!ext4_setup_super(sb, es, 0)) 5025 sb->s_flags &= ~MS_RDONLY; 5026 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 5027 EXT4_FEATURE_INCOMPAT_MMP)) 5028 if (ext4_multi_mount_protect(sb, 5029 le64_to_cpu(es->s_mmp_block))) { 5030 err = -EROFS; 5031 goto restore_opts; 5032 } 5033 enable_quota = 1; 5034 } 5035 } 5036 5037 /* 5038 * Reinitialize lazy itable initialization thread based on 5039 * current settings 5040 */ 5041 if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) 5042 ext4_unregister_li_request(sb); 5043 else { 5044 ext4_group_t first_not_zeroed; 5045 first_not_zeroed = ext4_has_uninit_itable(sb); 5046 ext4_register_li_request(sb, first_not_zeroed); 5047 } 5048 5049 ext4_setup_system_zone(sb); 5050 if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) 5051 ext4_commit_super(sb, 1); 5052 5053 #ifdef CONFIG_QUOTA 5054 /* Release old quota file names */ 5055 for (i = 0; i < EXT4_MAXQUOTAS; i++) 5056 kfree(old_opts.s_qf_names[i]); 5057 if (enable_quota) { 5058 if (sb_any_quota_suspended(sb)) 5059 dquot_resume(sb, -1); 5060 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 5061 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 5062 err = ext4_enable_quotas(sb); 5063 if (err) 5064 goto restore_opts; 5065 } 5066 } 5067 #endif 5068 5069 *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); 5070 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); 5071 kfree(orig_data); 5072 return 0; 5073 5074 restore_opts: 5075 sb->s_flags = old_sb_flags; 5076 sbi->s_mount_opt = old_opts.s_mount_opt; 5077 sbi->s_mount_opt2 = old_opts.s_mount_opt2; 5078 sbi->s_resuid = old_opts.s_resuid; 5079 sbi->s_resgid = old_opts.s_resgid; 5080 sbi->s_commit_interval = old_opts.s_commit_interval; 5081 sbi->s_min_batch_time = old_opts.s_min_batch_time; 5082 sbi->s_max_batch_time = old_opts.s_max_batch_time; 5083 #ifdef CONFIG_QUOTA 5084 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 5085 for (i = 0; i < EXT4_MAXQUOTAS; i++) { 5086 kfree(sbi->s_qf_names[i]); 5087 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 5088 } 5089 #endif 5090 kfree(orig_data); 5091 return err; 5092 } 5093 5094 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 5095 { 5096 struct super_block *sb = dentry->d_sb; 5097 struct ext4_sb_info *sbi = EXT4_SB(sb); 5098 struct ext4_super_block *es = sbi->s_es; 5099 ext4_fsblk_t overhead = 0, resv_blocks; 5100 u64 fsid; 5101 s64 bfree; 5102 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); 5103 5104 if (!test_opt(sb, MINIX_DF)) 5105 overhead = sbi->s_overhead; 5106 5107 buf->f_type = EXT4_SUPER_MAGIC; 5108 buf->f_bsize = sb->s_blocksize; 5109 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead); 5110 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - 5111 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); 5112 /* prevent underflow in case that few free space is available */ 5113 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); 5114 buf->f_bavail = buf->f_bfree - 5115 (ext4_r_blocks_count(es) + resv_blocks); 5116 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks)) 5117 buf->f_bavail = 0; 5118 buf->f_files = le32_to_cpu(es->s_inodes_count); 5119 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 5120 buf->f_namelen = EXT4_NAME_LEN; 5121 fsid = le64_to_cpup((void *)es->s_uuid) ^ 5122 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 5123 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 5124 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 5125 5126 return 0; 5127 } 5128 5129 /* Helper function for writing quotas on sync - we need to start transaction 5130 * before quota file is locked for write. Otherwise the are possible deadlocks: 5131 * Process 1 Process 2 5132 * ext4_create() quota_sync() 5133 * jbd2_journal_start() write_dquot() 5134 * dquot_initialize() down(dqio_mutex) 5135 * down(dqio_mutex) jbd2_journal_start() 5136 * 5137 */ 5138 5139 #ifdef CONFIG_QUOTA 5140 5141 static inline struct inode *dquot_to_inode(struct dquot *dquot) 5142 { 5143 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; 5144 } 5145 5146 static int ext4_write_dquot(struct dquot *dquot) 5147 { 5148 int ret, err; 5149 handle_t *handle; 5150 struct inode *inode; 5151 5152 inode = dquot_to_inode(dquot); 5153 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 5154 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 5155 if (IS_ERR(handle)) 5156 return PTR_ERR(handle); 5157 ret = dquot_commit(dquot); 5158 err = ext4_journal_stop(handle); 5159 if (!ret) 5160 ret = err; 5161 return ret; 5162 } 5163 5164 static int ext4_acquire_dquot(struct dquot *dquot) 5165 { 5166 int ret, err; 5167 handle_t *handle; 5168 5169 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, 5170 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 5171 if (IS_ERR(handle)) 5172 return PTR_ERR(handle); 5173 ret = dquot_acquire(dquot); 5174 err = ext4_journal_stop(handle); 5175 if (!ret) 5176 ret = err; 5177 return ret; 5178 } 5179 5180 static int ext4_release_dquot(struct dquot *dquot) 5181 { 5182 int ret, err; 5183 handle_t *handle; 5184 5185 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, 5186 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 5187 if (IS_ERR(handle)) { 5188 /* Release dquot anyway to avoid endless cycle in dqput() */ 5189 dquot_release(dquot); 5190 return PTR_ERR(handle); 5191 } 5192 ret = dquot_release(dquot); 5193 err = ext4_journal_stop(handle); 5194 if (!ret) 5195 ret = err; 5196 return ret; 5197 } 5198 5199 static int ext4_mark_dquot_dirty(struct dquot *dquot) 5200 { 5201 struct super_block *sb = dquot->dq_sb; 5202 struct ext4_sb_info *sbi = EXT4_SB(sb); 5203 5204 /* Are we journaling quotas? */ 5205 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || 5206 sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 5207 dquot_mark_dquot_dirty(dquot); 5208 return ext4_write_dquot(dquot); 5209 } else { 5210 return dquot_mark_dquot_dirty(dquot); 5211 } 5212 } 5213 5214 static int ext4_write_info(struct super_block *sb, int type) 5215 { 5216 int ret, err; 5217 handle_t *handle; 5218 5219 /* Data block + inode block */ 5220 handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); 5221 if (IS_ERR(handle)) 5222 return PTR_ERR(handle); 5223 ret = dquot_commit_info(sb, type); 5224 err = ext4_journal_stop(handle); 5225 if (!ret) 5226 ret = err; 5227 return ret; 5228 } 5229 5230 /* 5231 * Turn on quotas during mount time - we need to find 5232 * the quota file and such... 5233 */ 5234 static int ext4_quota_on_mount(struct super_block *sb, int type) 5235 { 5236 return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], 5237 EXT4_SB(sb)->s_jquota_fmt, type); 5238 } 5239 5240 /* 5241 * Standard function to be called on quota_on 5242 */ 5243 static int ext4_quota_on(struct super_block *sb, int type, int format_id, 5244 struct path *path) 5245 { 5246 int err; 5247 5248 if (!test_opt(sb, QUOTA)) 5249 return -EINVAL; 5250 5251 /* Quotafile not on the same filesystem? */ 5252 if (path->dentry->d_sb != sb) 5253 return -EXDEV; 5254 /* Journaling quota? */ 5255 if (EXT4_SB(sb)->s_qf_names[type]) { 5256 /* Quotafile not in fs root? */ 5257 if (path->dentry->d_parent != sb->s_root) 5258 ext4_msg(sb, KERN_WARNING, 5259 "Quota file not on filesystem root. " 5260 "Journaled quota will not work"); 5261 } 5262 5263 /* 5264 * When we journal data on quota file, we have to flush journal to see 5265 * all updates to the file when we bypass pagecache... 5266 */ 5267 if (EXT4_SB(sb)->s_journal && 5268 ext4_should_journal_data(d_inode(path->dentry))) { 5269 /* 5270 * We don't need to lock updates but journal_flush() could 5271 * otherwise be livelocked... 5272 */ 5273 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 5274 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 5275 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 5276 if (err) 5277 return err; 5278 } 5279 5280 return dquot_quota_on(sb, type, format_id, path); 5281 } 5282 5283 static int ext4_quota_enable(struct super_block *sb, int type, int format_id, 5284 unsigned int flags) 5285 { 5286 int err; 5287 struct inode *qf_inode; 5288 unsigned long qf_inums[EXT4_MAXQUOTAS] = { 5289 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5290 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5291 }; 5292 5293 BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); 5294 5295 if (!qf_inums[type]) 5296 return -EPERM; 5297 5298 qf_inode = ext4_iget(sb, qf_inums[type]); 5299 if (IS_ERR(qf_inode)) { 5300 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); 5301 return PTR_ERR(qf_inode); 5302 } 5303 5304 /* Don't account quota for quota files to avoid recursion */ 5305 qf_inode->i_flags |= S_NOQUOTA; 5306 err = dquot_enable(qf_inode, type, format_id, flags); 5307 iput(qf_inode); 5308 5309 return err; 5310 } 5311 5312 /* Enable usage tracking for all quota types. */ 5313 static int ext4_enable_quotas(struct super_block *sb) 5314 { 5315 int type, err = 0; 5316 unsigned long qf_inums[EXT4_MAXQUOTAS] = { 5317 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5318 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5319 }; 5320 5321 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; 5322 for (type = 0; type < EXT4_MAXQUOTAS; type++) { 5323 if (qf_inums[type]) { 5324 err = ext4_quota_enable(sb, type, QFMT_VFS_V1, 5325 DQUOT_USAGE_ENABLED); 5326 if (err) { 5327 ext4_warning(sb, 5328 "Failed to enable quota tracking " 5329 "(type=%d, err=%d). Please run " 5330 "e2fsck to fix.", type, err); 5331 return err; 5332 } 5333 } 5334 } 5335 return 0; 5336 } 5337 5338 static int ext4_quota_off(struct super_block *sb, int type) 5339 { 5340 struct inode *inode = sb_dqopt(sb)->files[type]; 5341 handle_t *handle; 5342 5343 /* Force all delayed allocation blocks to be allocated. 5344 * Caller already holds s_umount sem */ 5345 if (test_opt(sb, DELALLOC)) 5346 sync_filesystem(sb); 5347 5348 if (!inode) 5349 goto out; 5350 5351 /* Update modification times of quota files when userspace can 5352 * start looking at them */ 5353 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); 5354 if (IS_ERR(handle)) 5355 goto out; 5356 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 5357 ext4_mark_inode_dirty(handle, inode); 5358 ext4_journal_stop(handle); 5359 5360 out: 5361 return dquot_quota_off(sb, type); 5362 } 5363 5364 /* Read data from quotafile - avoid pagecache and such because we cannot afford 5365 * acquiring the locks... As quota files are never truncated and quota code 5366 * itself serializes the operations (and no one else should touch the files) 5367 * we don't have to be afraid of races */ 5368 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 5369 size_t len, loff_t off) 5370 { 5371 struct inode *inode = sb_dqopt(sb)->files[type]; 5372 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5373 int offset = off & (sb->s_blocksize - 1); 5374 int tocopy; 5375 size_t toread; 5376 struct buffer_head *bh; 5377 loff_t i_size = i_size_read(inode); 5378 5379 if (off > i_size) 5380 return 0; 5381 if (off+len > i_size) 5382 len = i_size-off; 5383 toread = len; 5384 while (toread > 0) { 5385 tocopy = sb->s_blocksize - offset < toread ? 5386 sb->s_blocksize - offset : toread; 5387 bh = ext4_bread(NULL, inode, blk, 0); 5388 if (IS_ERR(bh)) 5389 return PTR_ERR(bh); 5390 if (!bh) /* A hole? */ 5391 memset(data, 0, tocopy); 5392 else 5393 memcpy(data, bh->b_data+offset, tocopy); 5394 brelse(bh); 5395 offset = 0; 5396 toread -= tocopy; 5397 data += tocopy; 5398 blk++; 5399 } 5400 return len; 5401 } 5402 5403 /* Write to quotafile (we know the transaction is already started and has 5404 * enough credits) */ 5405 static ssize_t ext4_quota_write(struct super_block *sb, int type, 5406 const char *data, size_t len, loff_t off) 5407 { 5408 struct inode *inode = sb_dqopt(sb)->files[type]; 5409 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5410 int err, offset = off & (sb->s_blocksize - 1); 5411 struct buffer_head *bh; 5412 handle_t *handle = journal_current_handle(); 5413 5414 if (EXT4_SB(sb)->s_journal && !handle) { 5415 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 5416 " cancelled because transaction is not started", 5417 (unsigned long long)off, (unsigned long long)len); 5418 return -EIO; 5419 } 5420 /* 5421 * Since we account only one data block in transaction credits, 5422 * then it is impossible to cross a block boundary. 5423 */ 5424 if (sb->s_blocksize - offset < len) { 5425 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" 5426 " cancelled because not block aligned", 5427 (unsigned long long)off, (unsigned long long)len); 5428 return -EIO; 5429 } 5430 5431 bh = ext4_bread(handle, inode, blk, 1); 5432 if (IS_ERR(bh)) 5433 return PTR_ERR(bh); 5434 if (!bh) 5435 goto out; 5436 BUFFER_TRACE(bh, "get write access"); 5437 err = ext4_journal_get_write_access(handle, bh); 5438 if (err) { 5439 brelse(bh); 5440 return err; 5441 } 5442 lock_buffer(bh); 5443 memcpy(bh->b_data+offset, data, len); 5444 flush_dcache_page(bh->b_page); 5445 unlock_buffer(bh); 5446 err = ext4_handle_dirty_metadata(handle, NULL, bh); 5447 brelse(bh); 5448 out: 5449 if (inode->i_size < off + len) { 5450 i_size_write(inode, off + len); 5451 EXT4_I(inode)->i_disksize = inode->i_size; 5452 ext4_mark_inode_dirty(handle, inode); 5453 } 5454 return len; 5455 } 5456 5457 #endif 5458 5459 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 5460 const char *dev_name, void *data) 5461 { 5462 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); 5463 } 5464 5465 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 5466 static inline void register_as_ext2(void) 5467 { 5468 int err = register_filesystem(&ext2_fs_type); 5469 if (err) 5470 printk(KERN_WARNING 5471 "EXT4-fs: Unable to register as ext2 (%d)\n", err); 5472 } 5473 5474 static inline void unregister_as_ext2(void) 5475 { 5476 unregister_filesystem(&ext2_fs_type); 5477 } 5478 5479 static inline int ext2_feature_set_ok(struct super_block *sb) 5480 { 5481 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) 5482 return 0; 5483 if (sb->s_flags & MS_RDONLY) 5484 return 1; 5485 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) 5486 return 0; 5487 return 1; 5488 } 5489 #else 5490 static inline void register_as_ext2(void) { } 5491 static inline void unregister_as_ext2(void) { } 5492 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } 5493 #endif 5494 5495 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 5496 static inline void register_as_ext3(void) 5497 { 5498 int err = register_filesystem(&ext3_fs_type); 5499 if (err) 5500 printk(KERN_WARNING 5501 "EXT4-fs: Unable to register as ext3 (%d)\n", err); 5502 } 5503 5504 static inline void unregister_as_ext3(void) 5505 { 5506 unregister_filesystem(&ext3_fs_type); 5507 } 5508 5509 static inline int ext3_feature_set_ok(struct super_block *sb) 5510 { 5511 if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) 5512 return 0; 5513 if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) 5514 return 0; 5515 if (sb->s_flags & MS_RDONLY) 5516 return 1; 5517 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) 5518 return 0; 5519 return 1; 5520 } 5521 #else 5522 static inline void register_as_ext3(void) { } 5523 static inline void unregister_as_ext3(void) { } 5524 static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } 5525 #endif 5526 5527 static struct file_system_type ext4_fs_type = { 5528 .owner = THIS_MODULE, 5529 .name = "ext4", 5530 .mount = ext4_mount, 5531 .kill_sb = kill_block_super, 5532 .fs_flags = FS_REQUIRES_DEV, 5533 }; 5534 MODULE_ALIAS_FS("ext4"); 5535 5536 static int __init ext4_init_feat_adverts(void) 5537 { 5538 struct ext4_features *ef; 5539 int ret = -ENOMEM; 5540 5541 ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); 5542 if (!ef) 5543 goto out; 5544 5545 ef->f_kobj.kset = ext4_kset; 5546 init_completion(&ef->f_kobj_unregister); 5547 ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, 5548 "features"); 5549 if (ret) { 5550 kfree(ef); 5551 goto out; 5552 } 5553 5554 ext4_feat = ef; 5555 ret = 0; 5556 out: 5557 return ret; 5558 } 5559 5560 static void ext4_exit_feat_adverts(void) 5561 { 5562 kobject_put(&ext4_feat->f_kobj); 5563 wait_for_completion(&ext4_feat->f_kobj_unregister); 5564 kfree(ext4_feat); 5565 } 5566 5567 /* Shared across all ext4 file systems */ 5568 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; 5569 struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; 5570 5571 static int __init ext4_init_fs(void) 5572 { 5573 int i, err; 5574 5575 ext4_li_info = NULL; 5576 mutex_init(&ext4_li_mtx); 5577 5578 /* Build-time check for flags consistency */ 5579 ext4_check_flag_values(); 5580 5581 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { 5582 mutex_init(&ext4__aio_mutex[i]); 5583 init_waitqueue_head(&ext4__ioend_wq[i]); 5584 } 5585 5586 err = ext4_init_es(); 5587 if (err) 5588 return err; 5589 5590 err = ext4_init_pageio(); 5591 if (err) 5592 goto out7; 5593 5594 err = ext4_init_system_zone(); 5595 if (err) 5596 goto out6; 5597 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5598 if (!ext4_kset) { 5599 err = -ENOMEM; 5600 goto out5; 5601 } 5602 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 5603 5604 err = ext4_init_feat_adverts(); 5605 if (err) 5606 goto out4; 5607 5608 err = ext4_init_mballoc(); 5609 if (err) 5610 goto out2; 5611 else 5612 ext4_mballoc_ready = 1; 5613 err = init_inodecache(); 5614 if (err) 5615 goto out1; 5616 register_as_ext3(); 5617 register_as_ext2(); 5618 err = register_filesystem(&ext4_fs_type); 5619 if (err) 5620 goto out; 5621 5622 return 0; 5623 out: 5624 unregister_as_ext2(); 5625 unregister_as_ext3(); 5626 destroy_inodecache(); 5627 out1: 5628 ext4_mballoc_ready = 0; 5629 ext4_exit_mballoc(); 5630 out2: 5631 ext4_exit_feat_adverts(); 5632 out4: 5633 if (ext4_proc_root) 5634 remove_proc_entry("fs/ext4", NULL); 5635 kset_unregister(ext4_kset); 5636 out5: 5637 ext4_exit_system_zone(); 5638 out6: 5639 ext4_exit_pageio(); 5640 out7: 5641 ext4_exit_es(); 5642 5643 return err; 5644 } 5645 5646 static void __exit ext4_exit_fs(void) 5647 { 5648 ext4_destroy_lazyinit_thread(); 5649 unregister_as_ext2(); 5650 unregister_as_ext3(); 5651 unregister_filesystem(&ext4_fs_type); 5652 destroy_inodecache(); 5653 ext4_exit_mballoc(); 5654 ext4_exit_feat_adverts(); 5655 remove_proc_entry("fs/ext4", NULL); 5656 kset_unregister(ext4_kset); 5657 ext4_exit_system_zone(); 5658 ext4_exit_pageio(); 5659 ext4_exit_es(); 5660 } 5661 5662 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 5663 MODULE_DESCRIPTION("Fourth Extended Filesystem"); 5664 MODULE_LICENSE("GPL"); 5665 module_init(ext4_init_fs) 5666 module_exit(ext4_exit_fs) 5667