1 /* 2 * super.c - NILFS module and super block management. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net> 21 */ 22 /* 23 * linux/fs/ext2/super.c 24 * 25 * Copyright (C) 1992, 1993, 1994, 1995 26 * Remy Card (card@masi.ibp.fr) 27 * Laboratoire MASI - Institut Blaise Pascal 28 * Universite Pierre et Marie Curie (Paris VI) 29 * 30 * from 31 * 32 * linux/fs/minix/inode.c 33 * 34 * Copyright (C) 1991, 1992 Linus Torvalds 35 * 36 * Big-endian to little-endian byte-swapping/bitmaps by 37 * David S. Miller (davem@caip.rutgers.edu), 1995 38 */ 39 40 #include <linux/module.h> 41 #include <linux/string.h> 42 #include <linux/slab.h> 43 #include <linux/init.h> 44 #include <linux/blkdev.h> 45 #include <linux/parser.h> 46 #include <linux/random.h> 47 #include <linux/crc32.h> 48 #include <linux/smp_lock.h> 49 #include <linux/vfs.h> 50 #include <linux/writeback.h> 51 #include <linux/kobject.h> 52 #include <linux/exportfs.h> 53 #include "nilfs.h" 54 #include "mdt.h" 55 #include "alloc.h" 56 #include "page.h" 57 #include "cpfile.h" 58 #include "ifile.h" 59 #include "dat.h" 60 #include "segment.h" 61 #include "segbuf.h" 62 63 MODULE_AUTHOR("NTT Corp."); 64 MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " 65 "(NILFS)"); 66 MODULE_LICENSE("GPL"); 67 68 static int nilfs_remount(struct super_block *sb, int *flags, char *data); 69 static int test_exclusive_mount(struct file_system_type *fs_type, 70 struct block_device *bdev, int flags); 71 72 /** 73 * nilfs_error() - report failure condition on a filesystem 74 * 75 * nilfs_error() sets an ERROR_FS flag on the superblock as well as 76 * reporting an error message. It should be called when NILFS detects 77 * incoherences or defects of meta data on disk. As for sustainable 78 * errors such as a single-shot I/O error, nilfs_warning() or the printk() 79 * function should be used instead. 80 * 81 * The segment constructor must not call this function because it can 82 * kill itself. 83 */ 84 void nilfs_error(struct super_block *sb, const char *function, 85 const char *fmt, ...) 86 { 87 struct nilfs_sb_info *sbi = NILFS_SB(sb); 88 va_list args; 89 90 va_start(args, fmt); 91 printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); 92 vprintk(fmt, args); 93 printk("\n"); 94 va_end(args); 95 96 if (!(sb->s_flags & MS_RDONLY)) { 97 struct the_nilfs *nilfs = sbi->s_nilfs; 98 99 if (!nilfs_test_opt(sbi, ERRORS_CONT)) 100 nilfs_detach_segment_constructor(sbi); 101 102 down_write(&nilfs->ns_sem); 103 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { 104 nilfs->ns_mount_state |= NILFS_ERROR_FS; 105 nilfs->ns_sbp[0]->s_state |= 106 cpu_to_le16(NILFS_ERROR_FS); 107 nilfs_commit_super(sbi, 1); 108 } 109 up_write(&nilfs->ns_sem); 110 111 if (nilfs_test_opt(sbi, ERRORS_RO)) { 112 printk(KERN_CRIT "Remounting filesystem read-only\n"); 113 sb->s_flags |= MS_RDONLY; 114 } 115 } 116 117 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 118 panic("NILFS (device %s): panic forced after error\n", 119 sb->s_id); 120 } 121 122 void nilfs_warning(struct super_block *sb, const char *function, 123 const char *fmt, ...) 124 { 125 va_list args; 126 127 va_start(args, fmt); 128 printk(KERN_WARNING "NILFS warning (device %s): %s: ", 129 sb->s_id, function); 130 vprintk(fmt, args); 131 printk("\n"); 132 va_end(args); 133 } 134 135 static struct kmem_cache *nilfs_inode_cachep; 136 137 struct inode *nilfs_alloc_inode(struct super_block *sb) 138 { 139 struct nilfs_inode_info *ii; 140 141 ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS); 142 if (!ii) 143 return NULL; 144 ii->i_bh = NULL; 145 ii->i_state = 0; 146 ii->vfs_inode.i_version = 1; 147 nilfs_btnode_cache_init(&ii->i_btnode_cache); 148 return &ii->vfs_inode; 149 } 150 151 void nilfs_destroy_inode(struct inode *inode) 152 { 153 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 154 } 155 156 static void init_once(void *obj) 157 { 158 struct nilfs_inode_info *ii = obj; 159 160 INIT_LIST_HEAD(&ii->i_dirty); 161 #ifdef CONFIG_NILFS_XATTR 162 init_rwsem(&ii->xattr_sem); 163 #endif 164 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 165 ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; 166 inode_init_once(&ii->vfs_inode); 167 } 168 169 static int nilfs_init_inode_cache(void) 170 { 171 nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", 172 sizeof(struct nilfs_inode_info), 173 0, SLAB_RECLAIM_ACCOUNT, 174 init_once); 175 176 return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0; 177 } 178 179 static inline void nilfs_destroy_inode_cache(void) 180 { 181 kmem_cache_destroy(nilfs_inode_cachep); 182 } 183 184 static void nilfs_clear_inode(struct inode *inode) 185 { 186 struct nilfs_inode_info *ii = NILFS_I(inode); 187 188 #ifdef CONFIG_NILFS_POSIX_ACL 189 if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { 190 posix_acl_release(ii->i_acl); 191 ii->i_acl = NILFS_ACL_NOT_CACHED; 192 } 193 if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) { 194 posix_acl_release(ii->i_default_acl); 195 ii->i_default_acl = NILFS_ACL_NOT_CACHED; 196 } 197 #endif 198 /* 199 * Free resources allocated in nilfs_read_inode(), here. 200 */ 201 BUG_ON(!list_empty(&ii->i_dirty)); 202 brelse(ii->i_bh); 203 ii->i_bh = NULL; 204 205 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 206 nilfs_bmap_clear(ii->i_bmap); 207 208 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 209 } 210 211 static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) 212 { 213 struct the_nilfs *nilfs = sbi->s_nilfs; 214 int err; 215 int barrier_done = 0; 216 217 if (nilfs_test_opt(sbi, BARRIER)) { 218 set_buffer_ordered(nilfs->ns_sbh[0]); 219 barrier_done = 1; 220 } 221 retry: 222 set_buffer_dirty(nilfs->ns_sbh[0]); 223 err = sync_dirty_buffer(nilfs->ns_sbh[0]); 224 if (err == -EOPNOTSUPP && barrier_done) { 225 nilfs_warning(sbi->s_super, __func__, 226 "barrier-based sync failed. " 227 "disabling barriers\n"); 228 nilfs_clear_opt(sbi, BARRIER); 229 barrier_done = 0; 230 clear_buffer_ordered(nilfs->ns_sbh[0]); 231 goto retry; 232 } 233 if (unlikely(err)) { 234 printk(KERN_ERR 235 "NILFS: unable to write superblock (err=%d)\n", err); 236 if (err == -EIO && nilfs->ns_sbh[1]) { 237 nilfs_fall_back_super_block(nilfs); 238 goto retry; 239 } 240 } else { 241 struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; 242 243 /* 244 * The latest segment becomes trailable from the position 245 * written in superblock. 246 */ 247 clear_nilfs_discontinued(nilfs); 248 249 /* update GC protection for recent segments */ 250 if (nilfs->ns_sbh[1]) { 251 sbp = NULL; 252 if (dupsb) { 253 set_buffer_dirty(nilfs->ns_sbh[1]); 254 if (!sync_dirty_buffer(nilfs->ns_sbh[1])) 255 sbp = nilfs->ns_sbp[1]; 256 } 257 } 258 if (sbp) { 259 spin_lock(&nilfs->ns_last_segment_lock); 260 nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); 261 spin_unlock(&nilfs->ns_last_segment_lock); 262 } 263 } 264 265 return err; 266 } 267 268 int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) 269 { 270 struct the_nilfs *nilfs = sbi->s_nilfs; 271 struct nilfs_super_block **sbp = nilfs->ns_sbp; 272 sector_t nfreeblocks; 273 time_t t; 274 int err; 275 276 /* nilfs->sem must be locked by the caller. */ 277 if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { 278 if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) 279 nilfs_swap_super_block(nilfs); 280 else { 281 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", 282 sbi->s_super->s_id); 283 return -EIO; 284 } 285 } 286 err = nilfs_count_free_blocks(nilfs, &nfreeblocks); 287 if (unlikely(err)) { 288 printk(KERN_ERR "NILFS: failed to count free blocks\n"); 289 return err; 290 } 291 spin_lock(&nilfs->ns_last_segment_lock); 292 sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); 293 sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); 294 sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); 295 spin_unlock(&nilfs->ns_last_segment_lock); 296 297 t = get_seconds(); 298 nilfs->ns_sbwtime[0] = t; 299 sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); 300 sbp[0]->s_wtime = cpu_to_le64(t); 301 sbp[0]->s_sum = 0; 302 sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, 303 (unsigned char *)sbp[0], 304 nilfs->ns_sbsize)); 305 if (dupsb && sbp[1]) { 306 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); 307 nilfs->ns_sbwtime[1] = t; 308 } 309 sbi->s_super->s_dirt = 0; 310 return nilfs_sync_super(sbi, dupsb); 311 } 312 313 static void nilfs_put_super(struct super_block *sb) 314 { 315 struct nilfs_sb_info *sbi = NILFS_SB(sb); 316 struct the_nilfs *nilfs = sbi->s_nilfs; 317 318 nilfs_detach_segment_constructor(sbi); 319 320 if (!(sb->s_flags & MS_RDONLY)) { 321 down_write(&nilfs->ns_sem); 322 nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); 323 nilfs_commit_super(sbi, 1); 324 up_write(&nilfs->ns_sem); 325 } 326 327 nilfs_detach_checkpoint(sbi); 328 put_nilfs(sbi->s_nilfs); 329 sbi->s_super = NULL; 330 sb->s_fs_info = NULL; 331 kfree(sbi); 332 } 333 334 /** 335 * nilfs_write_super - write super block(s) of NILFS 336 * @sb: super_block 337 * 338 * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and 339 * clears s_dirt. This function is called in the section protected by 340 * lock_super(). 341 * 342 * The s_dirt flag is managed by each filesystem and we protect it by ns_sem 343 * of the struct the_nilfs. Lock order must be as follows: 344 * 345 * 1. lock_super() 346 * 2. down_write(&nilfs->ns_sem) 347 * 348 * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer 349 * of the super block (nilfs->ns_sbp[]). 350 * 351 * In most cases, VFS functions call lock_super() before calling these 352 * methods. So we must be careful not to bring on deadlocks when using 353 * lock_super(); see generic_shutdown_super(), write_super(), and so on. 354 * 355 * Note that order of lock_kernel() and lock_super() depends on contexts 356 * of VFS. We should also note that lock_kernel() can be used in its 357 * protective section and only the outermost one has an effect. 358 */ 359 static void nilfs_write_super(struct super_block *sb) 360 { 361 struct nilfs_sb_info *sbi = NILFS_SB(sb); 362 struct the_nilfs *nilfs = sbi->s_nilfs; 363 364 down_write(&nilfs->ns_sem); 365 if (!(sb->s_flags & MS_RDONLY)) { 366 struct nilfs_super_block **sbp = nilfs->ns_sbp; 367 u64 t = get_seconds(); 368 int dupsb; 369 370 if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] && 371 t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) { 372 up_write(&nilfs->ns_sem); 373 return; 374 } 375 dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; 376 nilfs_commit_super(sbi, dupsb); 377 } 378 sb->s_dirt = 0; 379 up_write(&nilfs->ns_sem); 380 } 381 382 static int nilfs_sync_fs(struct super_block *sb, int wait) 383 { 384 int err = 0; 385 386 /* This function is called when super block should be written back */ 387 if (wait) 388 err = nilfs_construct_segment(sb); 389 return err; 390 } 391 392 int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) 393 { 394 struct the_nilfs *nilfs = sbi->s_nilfs; 395 struct nilfs_checkpoint *raw_cp; 396 struct buffer_head *bh_cp; 397 int err; 398 399 down_write(&nilfs->ns_sem); 400 list_add(&sbi->s_list, &nilfs->ns_supers); 401 up_write(&nilfs->ns_sem); 402 403 sbi->s_ifile = nilfs_mdt_new( 404 nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); 405 if (!sbi->s_ifile) 406 return -ENOMEM; 407 408 err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size); 409 if (unlikely(err)) 410 goto failed; 411 412 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, 413 &bh_cp); 414 if (unlikely(err)) { 415 if (err == -ENOENT || err == -EINVAL) { 416 printk(KERN_ERR 417 "NILFS: Invalid checkpoint " 418 "(checkpoint number=%llu)\n", 419 (unsigned long long)cno); 420 err = -EINVAL; 421 } 422 goto failed; 423 } 424 err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); 425 if (unlikely(err)) 426 goto failed_bh; 427 atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); 428 atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); 429 430 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); 431 return 0; 432 433 failed_bh: 434 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); 435 failed: 436 nilfs_mdt_destroy(sbi->s_ifile); 437 sbi->s_ifile = NULL; 438 439 down_write(&nilfs->ns_sem); 440 list_del_init(&sbi->s_list); 441 up_write(&nilfs->ns_sem); 442 443 return err; 444 } 445 446 void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) 447 { 448 struct the_nilfs *nilfs = sbi->s_nilfs; 449 450 nilfs_mdt_clear(sbi->s_ifile); 451 nilfs_mdt_destroy(sbi->s_ifile); 452 sbi->s_ifile = NULL; 453 down_write(&nilfs->ns_sem); 454 list_del_init(&sbi->s_list); 455 up_write(&nilfs->ns_sem); 456 } 457 458 static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) 459 { 460 struct the_nilfs *nilfs = sbi->s_nilfs; 461 int err = 0; 462 463 down_write(&nilfs->ns_sem); 464 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { 465 nilfs->ns_mount_state |= NILFS_VALID_FS; 466 err = nilfs_commit_super(sbi, 1); 467 if (likely(!err)) 468 printk(KERN_INFO "NILFS: recovery complete.\n"); 469 } 470 up_write(&nilfs->ns_sem); 471 return err; 472 } 473 474 static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) 475 { 476 struct super_block *sb = dentry->d_sb; 477 struct nilfs_sb_info *sbi = NILFS_SB(sb); 478 struct the_nilfs *nilfs = sbi->s_nilfs; 479 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 480 unsigned long long blocks; 481 unsigned long overhead; 482 unsigned long nrsvblocks; 483 sector_t nfreeblocks; 484 int err; 485 486 /* 487 * Compute all of the segment blocks 488 * 489 * The blocks before first segment and after last segment 490 * are excluded. 491 */ 492 blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments 493 - nilfs->ns_first_data_block; 494 nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment; 495 496 /* 497 * Compute the overhead 498 * 499 * When distributing meta data blocks outside semgent structure, 500 * We must count them as the overhead. 501 */ 502 overhead = 0; 503 504 err = nilfs_count_free_blocks(nilfs, &nfreeblocks); 505 if (unlikely(err)) 506 return err; 507 508 buf->f_type = NILFS_SUPER_MAGIC; 509 buf->f_bsize = sb->s_blocksize; 510 buf->f_blocks = blocks - overhead; 511 buf->f_bfree = nfreeblocks; 512 buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? 513 (buf->f_bfree - nrsvblocks) : 0; 514 buf->f_files = atomic_read(&sbi->s_inodes_count); 515 buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ 516 buf->f_namelen = NILFS_NAME_LEN; 517 buf->f_fsid.val[0] = (u32)id; 518 buf->f_fsid.val[1] = (u32)(id >> 32); 519 520 return 0; 521 } 522 523 static struct super_operations nilfs_sops = { 524 .alloc_inode = nilfs_alloc_inode, 525 .destroy_inode = nilfs_destroy_inode, 526 .dirty_inode = nilfs_dirty_inode, 527 /* .write_inode = nilfs_write_inode, */ 528 /* .put_inode = nilfs_put_inode, */ 529 /* .drop_inode = nilfs_drop_inode, */ 530 .delete_inode = nilfs_delete_inode, 531 .put_super = nilfs_put_super, 532 .write_super = nilfs_write_super, 533 .sync_fs = nilfs_sync_fs, 534 /* .write_super_lockfs */ 535 /* .unlockfs */ 536 .statfs = nilfs_statfs, 537 .remount_fs = nilfs_remount, 538 .clear_inode = nilfs_clear_inode, 539 /* .umount_begin */ 540 /* .show_options */ 541 }; 542 543 static struct inode * 544 nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) 545 { 546 struct inode *inode; 547 548 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && 549 ino != NILFS_SKETCH_INO) 550 return ERR_PTR(-ESTALE); 551 552 inode = nilfs_iget(sb, ino); 553 if (IS_ERR(inode)) 554 return ERR_CAST(inode); 555 if (generation && inode->i_generation != generation) { 556 iput(inode); 557 return ERR_PTR(-ESTALE); 558 } 559 560 return inode; 561 } 562 563 static struct dentry * 564 nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, 565 int fh_type) 566 { 567 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 568 nilfs_nfs_get_inode); 569 } 570 571 static struct dentry * 572 nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, 573 int fh_type) 574 { 575 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 576 nilfs_nfs_get_inode); 577 } 578 579 static struct export_operations nilfs_export_ops = { 580 .fh_to_dentry = nilfs_fh_to_dentry, 581 .fh_to_parent = nilfs_fh_to_parent, 582 .get_parent = nilfs_get_parent, 583 }; 584 585 enum { 586 Opt_err_cont, Opt_err_panic, Opt_err_ro, 587 Opt_barrier, Opt_snapshot, Opt_order, 588 Opt_err, 589 }; 590 591 static match_table_t tokens = { 592 {Opt_err_cont, "errors=continue"}, 593 {Opt_err_panic, "errors=panic"}, 594 {Opt_err_ro, "errors=remount-ro"}, 595 {Opt_barrier, "barrier=%s"}, 596 {Opt_snapshot, "cp=%u"}, 597 {Opt_order, "order=%s"}, 598 {Opt_err, NULL} 599 }; 600 601 static int match_bool(substring_t *s, int *result) 602 { 603 int len = s->to - s->from; 604 605 if (strncmp(s->from, "on", len) == 0) 606 *result = 1; 607 else if (strncmp(s->from, "off", len) == 0) 608 *result = 0; 609 else 610 return 1; 611 return 0; 612 } 613 614 static int parse_options(char *options, struct super_block *sb) 615 { 616 struct nilfs_sb_info *sbi = NILFS_SB(sb); 617 char *p; 618 substring_t args[MAX_OPT_ARGS]; 619 int option; 620 621 if (!options) 622 return 1; 623 624 while ((p = strsep(&options, ",")) != NULL) { 625 int token; 626 if (!*p) 627 continue; 628 629 token = match_token(p, tokens, args); 630 switch (token) { 631 case Opt_barrier: 632 if (match_bool(&args[0], &option)) 633 return 0; 634 if (option) 635 nilfs_set_opt(sbi, BARRIER); 636 else 637 nilfs_clear_opt(sbi, BARRIER); 638 break; 639 case Opt_order: 640 if (strcmp(args[0].from, "relaxed") == 0) 641 /* Ordered data semantics */ 642 nilfs_clear_opt(sbi, STRICT_ORDER); 643 else if (strcmp(args[0].from, "strict") == 0) 644 /* Strict in-order semantics */ 645 nilfs_set_opt(sbi, STRICT_ORDER); 646 else 647 return 0; 648 break; 649 case Opt_err_panic: 650 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); 651 break; 652 case Opt_err_ro: 653 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); 654 break; 655 case Opt_err_cont: 656 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); 657 break; 658 case Opt_snapshot: 659 if (match_int(&args[0], &option) || option <= 0) 660 return 0; 661 if (!(sb->s_flags & MS_RDONLY)) 662 return 0; 663 sbi->s_snapshot_cno = option; 664 nilfs_set_opt(sbi, SNAPSHOT); 665 break; 666 default: 667 printk(KERN_ERR 668 "NILFS: Unrecognized mount option \"%s\"\n", p); 669 return 0; 670 } 671 } 672 return 1; 673 } 674 675 static inline void 676 nilfs_set_default_options(struct nilfs_sb_info *sbi, 677 struct nilfs_super_block *sbp) 678 { 679 sbi->s_mount_opt = 680 NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER; 681 } 682 683 static int nilfs_setup_super(struct nilfs_sb_info *sbi) 684 { 685 struct the_nilfs *nilfs = sbi->s_nilfs; 686 struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; 687 int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); 688 int mnt_count = le16_to_cpu(sbp->s_mnt_count); 689 690 /* nilfs->sem must be locked by the caller. */ 691 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { 692 printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); 693 } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) { 694 printk(KERN_WARNING 695 "NILFS warning: mounting fs with errors\n"); 696 #if 0 697 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { 698 printk(KERN_WARNING 699 "NILFS warning: maximal mount count reached\n"); 700 #endif 701 } 702 if (!max_mnt_count) 703 sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); 704 705 sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); 706 sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); 707 sbp->s_mtime = cpu_to_le64(get_seconds()); 708 return nilfs_commit_super(sbi, 1); 709 } 710 711 struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, 712 u64 pos, int blocksize, 713 struct buffer_head **pbh) 714 { 715 unsigned long long sb_index = pos; 716 unsigned long offset; 717 718 offset = do_div(sb_index, blocksize); 719 *pbh = sb_bread(sb, sb_index); 720 if (!*pbh) 721 return NULL; 722 return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); 723 } 724 725 int nilfs_store_magic_and_option(struct super_block *sb, 726 struct nilfs_super_block *sbp, 727 char *data) 728 { 729 struct nilfs_sb_info *sbi = NILFS_SB(sb); 730 731 sb->s_magic = le16_to_cpu(sbp->s_magic); 732 733 /* FS independent flags */ 734 #ifdef NILFS_ATIME_DISABLE 735 sb->s_flags |= MS_NOATIME; 736 #endif 737 738 nilfs_set_default_options(sbi, sbp); 739 740 sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); 741 sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); 742 sbi->s_interval = le32_to_cpu(sbp->s_c_interval); 743 sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); 744 745 return !parse_options(data, sb) ? -EINVAL : 0 ; 746 } 747 748 /** 749 * nilfs_fill_super() - initialize a super block instance 750 * @sb: super_block 751 * @data: mount options 752 * @silent: silent mode flag 753 * @nilfs: the_nilfs struct 754 * 755 * This function is called exclusively by bd_mount_mutex. 756 * So, the recovery process is protected from other simultaneous mounts. 757 */ 758 static int 759 nilfs_fill_super(struct super_block *sb, void *data, int silent, 760 struct the_nilfs *nilfs) 761 { 762 struct nilfs_sb_info *sbi; 763 struct inode *root; 764 __u64 cno; 765 int err; 766 767 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 768 if (!sbi) 769 return -ENOMEM; 770 771 sb->s_fs_info = sbi; 772 773 get_nilfs(nilfs); 774 sbi->s_nilfs = nilfs; 775 sbi->s_super = sb; 776 777 err = init_nilfs(nilfs, sbi, (char *)data); 778 if (err) 779 goto failed_sbi; 780 781 spin_lock_init(&sbi->s_inode_lock); 782 INIT_LIST_HEAD(&sbi->s_dirty_files); 783 INIT_LIST_HEAD(&sbi->s_list); 784 785 /* 786 * Following initialization is overlapped because 787 * nilfs_sb_info structure has been cleared at the beginning. 788 * But we reserve them to keep our interest and make ready 789 * for the future change. 790 */ 791 get_random_bytes(&sbi->s_next_generation, 792 sizeof(sbi->s_next_generation)); 793 spin_lock_init(&sbi->s_next_gen_lock); 794 795 sb->s_op = &nilfs_sops; 796 sb->s_export_op = &nilfs_export_ops; 797 sb->s_root = NULL; 798 sb->s_time_gran = 1; 799 800 if (!nilfs_loaded(nilfs)) { 801 err = load_nilfs(nilfs, sbi); 802 if (err) 803 goto failed_sbi; 804 } 805 cno = nilfs_last_cno(nilfs); 806 807 if (sb->s_flags & MS_RDONLY) { 808 if (nilfs_test_opt(sbi, SNAPSHOT)) { 809 err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, 810 sbi->s_snapshot_cno); 811 if (err < 0) 812 goto failed_sbi; 813 if (!err) { 814 printk(KERN_ERR 815 "NILFS: The specified checkpoint is " 816 "not a snapshot " 817 "(checkpoint number=%llu).\n", 818 (unsigned long long)sbi->s_snapshot_cno); 819 err = -EINVAL; 820 goto failed_sbi; 821 } 822 cno = sbi->s_snapshot_cno; 823 } else 824 /* Read-only mount */ 825 sbi->s_snapshot_cno = cno; 826 } 827 828 err = nilfs_attach_checkpoint(sbi, cno); 829 if (err) { 830 printk(KERN_ERR "NILFS: error loading a checkpoint" 831 " (checkpoint number=%llu).\n", (unsigned long long)cno); 832 goto failed_sbi; 833 } 834 835 if (!(sb->s_flags & MS_RDONLY)) { 836 err = nilfs_attach_segment_constructor(sbi); 837 if (err) 838 goto failed_checkpoint; 839 } 840 841 root = nilfs_iget(sb, NILFS_ROOT_INO); 842 if (IS_ERR(root)) { 843 printk(KERN_ERR "NILFS: get root inode failed\n"); 844 err = PTR_ERR(root); 845 goto failed_segctor; 846 } 847 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 848 iput(root); 849 printk(KERN_ERR "NILFS: corrupt root inode.\n"); 850 err = -EINVAL; 851 goto failed_segctor; 852 } 853 sb->s_root = d_alloc_root(root); 854 if (!sb->s_root) { 855 iput(root); 856 printk(KERN_ERR "NILFS: get root dentry failed\n"); 857 err = -ENOMEM; 858 goto failed_segctor; 859 } 860 861 if (!(sb->s_flags & MS_RDONLY)) { 862 down_write(&nilfs->ns_sem); 863 nilfs_setup_super(sbi); 864 up_write(&nilfs->ns_sem); 865 } 866 867 err = nilfs_mark_recovery_complete(sbi); 868 if (unlikely(err)) { 869 printk(KERN_ERR "NILFS: recovery failed.\n"); 870 goto failed_root; 871 } 872 873 return 0; 874 875 failed_root: 876 dput(sb->s_root); 877 sb->s_root = NULL; 878 879 failed_segctor: 880 nilfs_detach_segment_constructor(sbi); 881 882 failed_checkpoint: 883 nilfs_detach_checkpoint(sbi); 884 885 failed_sbi: 886 put_nilfs(nilfs); 887 sb->s_fs_info = NULL; 888 kfree(sbi); 889 return err; 890 } 891 892 static int nilfs_remount(struct super_block *sb, int *flags, char *data) 893 { 894 struct nilfs_sb_info *sbi = NILFS_SB(sb); 895 struct nilfs_super_block *sbp; 896 struct the_nilfs *nilfs = sbi->s_nilfs; 897 unsigned long old_sb_flags; 898 struct nilfs_mount_options old_opts; 899 int err; 900 901 old_sb_flags = sb->s_flags; 902 old_opts.mount_opt = sbi->s_mount_opt; 903 old_opts.snapshot_cno = sbi->s_snapshot_cno; 904 905 if (!parse_options(data, sb)) { 906 err = -EINVAL; 907 goto restore_opts; 908 } 909 sb->s_flags = (sb->s_flags & ~MS_POSIXACL); 910 911 if ((*flags & MS_RDONLY) && 912 sbi->s_snapshot_cno != old_opts.snapshot_cno) { 913 printk(KERN_WARNING "NILFS (device %s): couldn't " 914 "remount to a different snapshot. \n", 915 sb->s_id); 916 err = -EINVAL; 917 goto restore_opts; 918 } 919 920 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 921 goto out; 922 if (*flags & MS_RDONLY) { 923 /* Shutting down the segment constructor */ 924 nilfs_detach_segment_constructor(sbi); 925 sb->s_flags |= MS_RDONLY; 926 927 sbi->s_snapshot_cno = nilfs_last_cno(nilfs); 928 /* nilfs_set_opt(sbi, SNAPSHOT); */ 929 930 /* 931 * Remounting a valid RW partition RDONLY, so set 932 * the RDONLY flag and then mark the partition as valid again. 933 */ 934 down_write(&nilfs->ns_sem); 935 sbp = nilfs->ns_sbp[0]; 936 if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && 937 (nilfs->ns_mount_state & NILFS_VALID_FS)) 938 sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); 939 sbp->s_mtime = cpu_to_le64(get_seconds()); 940 nilfs_commit_super(sbi, 1); 941 up_write(&nilfs->ns_sem); 942 } else { 943 /* 944 * Mounting a RDONLY partition read-write, so reread and 945 * store the current valid flag. (It may have been changed 946 * by fsck since we originally mounted the partition.) 947 */ 948 down(&sb->s_bdev->bd_mount_sem); 949 /* Check existing RW-mount */ 950 if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { 951 printk(KERN_WARNING "NILFS (device %s): couldn't " 952 "remount because a RW-mount exists.\n", 953 sb->s_id); 954 err = -EBUSY; 955 goto rw_remount_failed; 956 } 957 if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { 958 printk(KERN_WARNING "NILFS (device %s): couldn't " 959 "remount because the current RO-mount is not " 960 "the latest one.\n", 961 sb->s_id); 962 err = -EINVAL; 963 goto rw_remount_failed; 964 } 965 sb->s_flags &= ~MS_RDONLY; 966 nilfs_clear_opt(sbi, SNAPSHOT); 967 sbi->s_snapshot_cno = 0; 968 969 err = nilfs_attach_segment_constructor(sbi); 970 if (err) 971 goto rw_remount_failed; 972 973 down_write(&nilfs->ns_sem); 974 nilfs_setup_super(sbi); 975 up_write(&nilfs->ns_sem); 976 977 up(&sb->s_bdev->bd_mount_sem); 978 } 979 out: 980 return 0; 981 982 rw_remount_failed: 983 up(&sb->s_bdev->bd_mount_sem); 984 restore_opts: 985 sb->s_flags = old_sb_flags; 986 sbi->s_mount_opt = old_opts.mount_opt; 987 sbi->s_snapshot_cno = old_opts.snapshot_cno; 988 return err; 989 } 990 991 struct nilfs_super_data { 992 struct block_device *bdev; 993 __u64 cno; 994 int flags; 995 }; 996 997 /** 998 * nilfs_identify - pre-read mount options needed to identify mount instance 999 * @data: mount options 1000 * @sd: nilfs_super_data 1001 */ 1002 static int nilfs_identify(char *data, struct nilfs_super_data *sd) 1003 { 1004 char *p, *options = data; 1005 substring_t args[MAX_OPT_ARGS]; 1006 int option, token; 1007 int ret = 0; 1008 1009 do { 1010 p = strsep(&options, ","); 1011 if (p != NULL && *p) { 1012 token = match_token(p, tokens, args); 1013 if (token == Opt_snapshot) { 1014 if (!(sd->flags & MS_RDONLY)) 1015 ret++; 1016 else { 1017 ret = match_int(&args[0], &option); 1018 if (!ret) { 1019 if (option > 0) 1020 sd->cno = option; 1021 else 1022 ret++; 1023 } 1024 } 1025 } 1026 if (ret) 1027 printk(KERN_ERR 1028 "NILFS: invalid mount option: %s\n", p); 1029 } 1030 if (!options) 1031 break; 1032 BUG_ON(options == data); 1033 *(options - 1) = ','; 1034 } while (!ret); 1035 return ret; 1036 } 1037 1038 static int nilfs_set_bdev_super(struct super_block *s, void *data) 1039 { 1040 struct nilfs_super_data *sd = data; 1041 1042 s->s_bdev = sd->bdev; 1043 s->s_dev = s->s_bdev->bd_dev; 1044 return 0; 1045 } 1046 1047 static int nilfs_test_bdev_super(struct super_block *s, void *data) 1048 { 1049 struct nilfs_super_data *sd = data; 1050 1051 return s->s_bdev == sd->bdev; 1052 } 1053 1054 static int nilfs_test_bdev_super2(struct super_block *s, void *data) 1055 { 1056 struct nilfs_super_data *sd = data; 1057 int ret; 1058 1059 if (s->s_bdev != sd->bdev) 1060 return 0; 1061 1062 if (!((s->s_flags | sd->flags) & MS_RDONLY)) 1063 return 1; /* Reuse an old R/W-mode super_block */ 1064 1065 if (s->s_flags & sd->flags & MS_RDONLY) { 1066 if (down_read_trylock(&s->s_umount)) { 1067 ret = s->s_root && 1068 (sd->cno == NILFS_SB(s)->s_snapshot_cno); 1069 up_read(&s->s_umount); 1070 /* 1071 * This path is locked with sb_lock by sget(). 1072 * So, drop_super() causes deadlock. 1073 */ 1074 return ret; 1075 } 1076 } 1077 return 0; 1078 } 1079 1080 static int 1081 nilfs_get_sb(struct file_system_type *fs_type, int flags, 1082 const char *dev_name, void *data, struct vfsmount *mnt) 1083 { 1084 struct nilfs_super_data sd; 1085 struct super_block *s, *s2; 1086 struct the_nilfs *nilfs = NULL; 1087 int err, need_to_close = 1; 1088 1089 sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); 1090 if (IS_ERR(sd.bdev)) 1091 return PTR_ERR(sd.bdev); 1092 1093 /* 1094 * To get mount instance using sget() vfs-routine, NILFS needs 1095 * much more information than normal filesystems to identify mount 1096 * instance. For snapshot mounts, not only a mount type (ro-mount 1097 * or rw-mount) but also a checkpoint number is required. 1098 * The results are passed in sget() using nilfs_super_data. 1099 */ 1100 sd.cno = 0; 1101 sd.flags = flags; 1102 if (nilfs_identify((char *)data, &sd)) { 1103 err = -EINVAL; 1104 goto failed; 1105 } 1106 1107 /* 1108 * once the super is inserted into the list by sget, s_umount 1109 * will protect the lockfs code from trying to start a snapshot 1110 * while we are mounting 1111 */ 1112 down(&sd.bdev->bd_mount_sem); 1113 if (!sd.cno && 1114 (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { 1115 err = (err < 0) ? : -EBUSY; 1116 goto failed_unlock; 1117 } 1118 1119 /* 1120 * Phase-1: search any existent instance and get the_nilfs 1121 */ 1122 s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); 1123 if (IS_ERR(s)) 1124 goto error_s; 1125 1126 if (!s->s_root) { 1127 err = -ENOMEM; 1128 nilfs = alloc_nilfs(sd.bdev); 1129 if (!nilfs) 1130 goto cancel_new; 1131 } else { 1132 struct nilfs_sb_info *sbi = NILFS_SB(s); 1133 1134 /* 1135 * s_umount protects super_block from unmount process; 1136 * It covers pointers of nilfs_sb_info and the_nilfs. 1137 */ 1138 nilfs = sbi->s_nilfs; 1139 get_nilfs(nilfs); 1140 up_write(&s->s_umount); 1141 1142 /* 1143 * Phase-2: search specified snapshot or R/W mode super_block 1144 */ 1145 if (!sd.cno) 1146 /* trying to get the latest checkpoint. */ 1147 sd.cno = nilfs_last_cno(nilfs); 1148 1149 s2 = sget(fs_type, nilfs_test_bdev_super2, 1150 nilfs_set_bdev_super, &sd); 1151 deactivate_super(s); 1152 /* 1153 * Although deactivate_super() invokes close_bdev_exclusive() at 1154 * kill_block_super(). Here, s is an existent mount; we need 1155 * one more close_bdev_exclusive() call. 1156 */ 1157 s = s2; 1158 if (IS_ERR(s)) 1159 goto error_s; 1160 } 1161 1162 if (!s->s_root) { 1163 char b[BDEVNAME_SIZE]; 1164 1165 s->s_flags = flags; 1166 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); 1167 sb_set_blocksize(s, block_size(sd.bdev)); 1168 1169 err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs); 1170 if (err) 1171 goto cancel_new; 1172 1173 s->s_flags |= MS_ACTIVE; 1174 need_to_close = 0; 1175 } else if (!(s->s_flags & MS_RDONLY)) { 1176 err = -EBUSY; 1177 } 1178 1179 up(&sd.bdev->bd_mount_sem); 1180 put_nilfs(nilfs); 1181 if (need_to_close) 1182 close_bdev_exclusive(sd.bdev, flags); 1183 simple_set_mnt(mnt, s); 1184 return 0; 1185 1186 error_s: 1187 up(&sd.bdev->bd_mount_sem); 1188 if (nilfs) 1189 put_nilfs(nilfs); 1190 close_bdev_exclusive(sd.bdev, flags); 1191 return PTR_ERR(s); 1192 1193 failed_unlock: 1194 up(&sd.bdev->bd_mount_sem); 1195 failed: 1196 close_bdev_exclusive(sd.bdev, flags); 1197 1198 return err; 1199 1200 cancel_new: 1201 /* Abandoning the newly allocated superblock */ 1202 up(&sd.bdev->bd_mount_sem); 1203 if (nilfs) 1204 put_nilfs(nilfs); 1205 up_write(&s->s_umount); 1206 deactivate_super(s); 1207 /* 1208 * deactivate_super() invokes close_bdev_exclusive(). 1209 * We must finish all post-cleaning before this call; 1210 * put_nilfs() and unlocking bd_mount_sem need the block device. 1211 */ 1212 return err; 1213 } 1214 1215 static int nilfs_test_bdev_super3(struct super_block *s, void *data) 1216 { 1217 struct nilfs_super_data *sd = data; 1218 int ret; 1219 1220 if (s->s_bdev != sd->bdev) 1221 return 0; 1222 if (down_read_trylock(&s->s_umount)) { 1223 ret = (s->s_flags & MS_RDONLY) && s->s_root && 1224 nilfs_test_opt(NILFS_SB(s), SNAPSHOT); 1225 up_read(&s->s_umount); 1226 if (ret) 1227 return 0; /* ignore snapshot mounts */ 1228 } 1229 return !((sd->flags ^ s->s_flags) & MS_RDONLY); 1230 } 1231 1232 static int __false_bdev_super(struct super_block *s, void *data) 1233 { 1234 #if 0 /* XXX: workaround for lock debug. This is not good idea */ 1235 up_write(&s->s_umount); 1236 #endif 1237 return -EFAULT; 1238 } 1239 1240 /** 1241 * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. 1242 * fs_type: filesystem type 1243 * bdev: block device 1244 * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) 1245 * res: pointer to an integer to store result 1246 * 1247 * This function must be called within a section protected by bd_mount_mutex. 1248 */ 1249 static int test_exclusive_mount(struct file_system_type *fs_type, 1250 struct block_device *bdev, int flags) 1251 { 1252 struct super_block *s; 1253 struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; 1254 1255 s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); 1256 if (IS_ERR(s)) { 1257 if (PTR_ERR(s) != -EFAULT) 1258 return PTR_ERR(s); 1259 return 0; /* Not found */ 1260 } 1261 up_write(&s->s_umount); 1262 deactivate_super(s); 1263 return 1; /* Found */ 1264 } 1265 1266 struct file_system_type nilfs_fs_type = { 1267 .owner = THIS_MODULE, 1268 .name = "nilfs2", 1269 .get_sb = nilfs_get_sb, 1270 .kill_sb = kill_block_super, 1271 .fs_flags = FS_REQUIRES_DEV, 1272 }; 1273 1274 static int __init init_nilfs_fs(void) 1275 { 1276 int err; 1277 1278 err = nilfs_init_inode_cache(); 1279 if (err) 1280 goto failed; 1281 1282 err = nilfs_init_transaction_cache(); 1283 if (err) 1284 goto failed_inode_cache; 1285 1286 err = nilfs_init_segbuf_cache(); 1287 if (err) 1288 goto failed_transaction_cache; 1289 1290 err = nilfs_btree_path_cache_init(); 1291 if (err) 1292 goto failed_segbuf_cache; 1293 1294 err = register_filesystem(&nilfs_fs_type); 1295 if (err) 1296 goto failed_btree_path_cache; 1297 1298 return 0; 1299 1300 failed_btree_path_cache: 1301 nilfs_btree_path_cache_destroy(); 1302 1303 failed_segbuf_cache: 1304 nilfs_destroy_segbuf_cache(); 1305 1306 failed_transaction_cache: 1307 nilfs_destroy_transaction_cache(); 1308 1309 failed_inode_cache: 1310 nilfs_destroy_inode_cache(); 1311 1312 failed: 1313 return err; 1314 } 1315 1316 static void __exit exit_nilfs_fs(void) 1317 { 1318 nilfs_destroy_segbuf_cache(); 1319 nilfs_destroy_transaction_cache(); 1320 nilfs_destroy_inode_cache(); 1321 nilfs_btree_path_cache_destroy(); 1322 unregister_filesystem(&nilfs_fs_type); 1323 } 1324 1325 module_init(init_nilfs_fs) 1326 module_exit(exit_nilfs_fs) 1327