1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * super.c 5 * 6 * load/unload driver, mount/dismount volumes 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/module.h> 27 #include <linux/fs.h> 28 #include <linux/types.h> 29 #include <linux/slab.h> 30 #include <linux/highmem.h> 31 #include <linux/utsname.h> 32 #include <linux/init.h> 33 #include <linux/random.h> 34 #include <linux/statfs.h> 35 #include <linux/moduleparam.h> 36 #include <linux/blkdev.h> 37 #include <linux/socket.h> 38 #include <linux/inet.h> 39 #include <linux/parser.h> 40 #include <linux/crc32.h> 41 #include <linux/debugfs.h> 42 #include <linux/mount.h> 43 44 #include <cluster/nodemanager.h> 45 46 #define MLOG_MASK_PREFIX ML_SUPER 47 #include <cluster/masklog.h> 48 49 #include "ocfs2.h" 50 51 /* this should be the only file to include a version 1 header */ 52 #include "ocfs1_fs_compat.h" 53 54 #include "alloc.h" 55 #include "dlmglue.h" 56 #include "export.h" 57 #include "extent_map.h" 58 #include "heartbeat.h" 59 #include "inode.h" 60 #include "journal.h" 61 #include "localalloc.h" 62 #include "namei.h" 63 #include "slot_map.h" 64 #include "super.h" 65 #include "sysfile.h" 66 #include "uptodate.h" 67 #include "ver.h" 68 69 #include "buffer_head_io.h" 70 71 static struct kmem_cache *ocfs2_inode_cachep = NULL; 72 73 /* OCFS2 needs to schedule several differnt types of work which 74 * require cluster locking, disk I/O, recovery waits, etc. Since these 75 * types of work tend to be heavy we avoid using the kernel events 76 * workqueue and schedule on our own. */ 77 struct workqueue_struct *ocfs2_wq = NULL; 78 79 static struct dentry *ocfs2_debugfs_root = NULL; 80 81 MODULE_AUTHOR("Oracle"); 82 MODULE_LICENSE("GPL"); 83 84 struct mount_options 85 { 86 unsigned long commit_interval; 87 unsigned long mount_opt; 88 unsigned int atime_quantum; 89 signed short slot; 90 unsigned int localalloc_opt; 91 }; 92 93 static int ocfs2_parse_options(struct super_block *sb, char *options, 94 struct mount_options *mopt, 95 int is_remount); 96 static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 97 static void ocfs2_put_super(struct super_block *sb); 98 static int ocfs2_mount_volume(struct super_block *sb); 99 static int ocfs2_remount(struct super_block *sb, int *flags, char *data); 100 static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err); 101 static int ocfs2_initialize_mem_caches(void); 102 static void ocfs2_free_mem_caches(void); 103 static void ocfs2_delete_osb(struct ocfs2_super *osb); 104 105 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf); 106 107 static int ocfs2_sync_fs(struct super_block *sb, int wait); 108 109 static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); 110 static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); 111 static void ocfs2_release_system_inodes(struct ocfs2_super *osb); 112 static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); 113 static int ocfs2_check_volume(struct ocfs2_super *osb); 114 static int ocfs2_verify_volume(struct ocfs2_dinode *di, 115 struct buffer_head *bh, 116 u32 sectsize); 117 static int ocfs2_initialize_super(struct super_block *sb, 118 struct buffer_head *bh, 119 int sector_size); 120 static int ocfs2_get_sector(struct super_block *sb, 121 struct buffer_head **bh, 122 int block, 123 int sect_size); 124 static void ocfs2_write_super(struct super_block *sb); 125 static struct inode *ocfs2_alloc_inode(struct super_block *sb); 126 static void ocfs2_destroy_inode(struct inode *inode); 127 128 static const struct super_operations ocfs2_sops = { 129 .statfs = ocfs2_statfs, 130 .alloc_inode = ocfs2_alloc_inode, 131 .destroy_inode = ocfs2_destroy_inode, 132 .drop_inode = ocfs2_drop_inode, 133 .clear_inode = ocfs2_clear_inode, 134 .delete_inode = ocfs2_delete_inode, 135 .sync_fs = ocfs2_sync_fs, 136 .write_super = ocfs2_write_super, 137 .put_super = ocfs2_put_super, 138 .remount_fs = ocfs2_remount, 139 .show_options = ocfs2_show_options, 140 }; 141 142 enum { 143 Opt_barrier, 144 Opt_err_panic, 145 Opt_err_ro, 146 Opt_intr, 147 Opt_nointr, 148 Opt_hb_none, 149 Opt_hb_local, 150 Opt_data_ordered, 151 Opt_data_writeback, 152 Opt_atime_quantum, 153 Opt_slot, 154 Opt_commit, 155 Opt_localalloc, 156 Opt_err, 157 }; 158 159 static match_table_t tokens = { 160 {Opt_barrier, "barrier=%u"}, 161 {Opt_err_panic, "errors=panic"}, 162 {Opt_err_ro, "errors=remount-ro"}, 163 {Opt_intr, "intr"}, 164 {Opt_nointr, "nointr"}, 165 {Opt_hb_none, OCFS2_HB_NONE}, 166 {Opt_hb_local, OCFS2_HB_LOCAL}, 167 {Opt_data_ordered, "data=ordered"}, 168 {Opt_data_writeback, "data=writeback"}, 169 {Opt_atime_quantum, "atime_quantum=%u"}, 170 {Opt_slot, "preferred_slot=%u"}, 171 {Opt_commit, "commit=%u"}, 172 {Opt_localalloc, "localalloc=%d"}, 173 {Opt_err, NULL} 174 }; 175 176 /* 177 * write_super and sync_fs ripped right out of ext3. 178 */ 179 static void ocfs2_write_super(struct super_block *sb) 180 { 181 if (mutex_trylock(&sb->s_lock) != 0) 182 BUG(); 183 sb->s_dirt = 0; 184 } 185 186 static int ocfs2_sync_fs(struct super_block *sb, int wait) 187 { 188 int status; 189 tid_t target; 190 struct ocfs2_super *osb = OCFS2_SB(sb); 191 192 sb->s_dirt = 0; 193 194 if (ocfs2_is_hard_readonly(osb)) 195 return -EROFS; 196 197 if (wait) { 198 status = ocfs2_flush_truncate_log(osb); 199 if (status < 0) 200 mlog_errno(status); 201 } else { 202 ocfs2_schedule_truncate_log_flush(osb, 0); 203 } 204 205 if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) { 206 if (wait) 207 log_wait_commit(OCFS2_SB(sb)->journal->j_journal, 208 target); 209 } 210 return 0; 211 } 212 213 static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) 214 { 215 struct inode *new = NULL; 216 int status = 0; 217 int i; 218 219 mlog_entry_void(); 220 221 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE); 222 if (IS_ERR(new)) { 223 status = PTR_ERR(new); 224 mlog_errno(status); 225 goto bail; 226 } 227 osb->root_inode = new; 228 229 new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE); 230 if (IS_ERR(new)) { 231 status = PTR_ERR(new); 232 mlog_errno(status); 233 goto bail; 234 } 235 osb->sys_root_inode = new; 236 237 for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; 238 i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { 239 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 240 if (!new) { 241 ocfs2_release_system_inodes(osb); 242 status = -EINVAL; 243 mlog_errno(status); 244 /* FIXME: Should ERROR_RO_FS */ 245 mlog(ML_ERROR, "Unable to load system inode %d, " 246 "possibly corrupt fs?", i); 247 goto bail; 248 } 249 // the array now has one ref, so drop this one 250 iput(new); 251 } 252 253 bail: 254 mlog_exit(status); 255 return status; 256 } 257 258 static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) 259 { 260 struct inode *new = NULL; 261 int status = 0; 262 int i; 263 264 mlog_entry_void(); 265 266 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; 267 i < NUM_SYSTEM_INODES; 268 i++) { 269 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 270 if (!new) { 271 ocfs2_release_system_inodes(osb); 272 status = -EINVAL; 273 mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n", 274 status, i, osb->slot_num); 275 goto bail; 276 } 277 /* the array now has one ref, so drop this one */ 278 iput(new); 279 } 280 281 bail: 282 mlog_exit(status); 283 return status; 284 } 285 286 static void ocfs2_release_system_inodes(struct ocfs2_super *osb) 287 { 288 int i; 289 struct inode *inode; 290 291 mlog_entry_void(); 292 293 for (i = 0; i < NUM_SYSTEM_INODES; i++) { 294 inode = osb->system_inodes[i]; 295 if (inode) { 296 iput(inode); 297 osb->system_inodes[i] = NULL; 298 } 299 } 300 301 inode = osb->sys_root_inode; 302 if (inode) { 303 iput(inode); 304 osb->sys_root_inode = NULL; 305 } 306 307 inode = osb->root_inode; 308 if (inode) { 309 iput(inode); 310 osb->root_inode = NULL; 311 } 312 313 mlog_exit(0); 314 } 315 316 /* We're allocating fs objects, use GFP_NOFS */ 317 static struct inode *ocfs2_alloc_inode(struct super_block *sb) 318 { 319 struct ocfs2_inode_info *oi; 320 321 oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS); 322 if (!oi) 323 return NULL; 324 325 return &oi->vfs_inode; 326 } 327 328 static void ocfs2_destroy_inode(struct inode *inode) 329 { 330 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 331 } 332 333 static unsigned long long ocfs2_max_file_offset(unsigned int bbits, 334 unsigned int cbits) 335 { 336 unsigned int bytes = 1 << cbits; 337 unsigned int trim = bytes; 338 unsigned int bitshift = 32; 339 340 /* 341 * i_size and all block offsets in ocfs2 are always 64 bits 342 * wide. i_clusters is 32 bits, in cluster-sized units. So on 343 * 64 bit platforms, cluster size will be the limiting factor. 344 */ 345 346 #if BITS_PER_LONG == 32 347 # if defined(CONFIG_LBD) 348 BUILD_BUG_ON(sizeof(sector_t) != 8); 349 /* 350 * We might be limited by page cache size. 351 */ 352 if (bytes > PAGE_CACHE_SIZE) { 353 bytes = PAGE_CACHE_SIZE; 354 trim = 1; 355 /* 356 * Shift by 31 here so that we don't get larger than 357 * MAX_LFS_FILESIZE 358 */ 359 bitshift = 31; 360 } 361 # else 362 /* 363 * We are limited by the size of sector_t. Use block size, as 364 * that's what we expose to the VFS. 365 */ 366 bytes = 1 << bbits; 367 trim = 1; 368 bitshift = 31; 369 # endif 370 #endif 371 372 /* 373 * Trim by a whole cluster when we can actually approach the 374 * on-disk limits. Otherwise we can overflow i_clusters when 375 * an extent start is at the max offset. 376 */ 377 return (((unsigned long long)bytes) << bitshift) - trim; 378 } 379 380 static int ocfs2_remount(struct super_block *sb, int *flags, char *data) 381 { 382 int incompat_features; 383 int ret = 0; 384 struct mount_options parsed_options; 385 struct ocfs2_super *osb = OCFS2_SB(sb); 386 387 if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { 388 ret = -EINVAL; 389 goto out; 390 } 391 392 if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != 393 (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 394 ret = -EINVAL; 395 mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); 396 goto out; 397 } 398 399 if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) != 400 (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) { 401 ret = -EINVAL; 402 mlog(ML_ERROR, "Cannot change data mode on remount\n"); 403 goto out; 404 } 405 406 /* We're going to/from readonly mode. */ 407 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 408 /* Lock here so the check of HARD_RO and the potential 409 * setting of SOFT_RO is atomic. */ 410 spin_lock(&osb->osb_lock); 411 if (osb->osb_flags & OCFS2_OSB_HARD_RO) { 412 mlog(ML_ERROR, "Remount on readonly device is forbidden.\n"); 413 ret = -EROFS; 414 goto unlock_osb; 415 } 416 417 if (*flags & MS_RDONLY) { 418 mlog(0, "Going to ro mode.\n"); 419 sb->s_flags |= MS_RDONLY; 420 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 421 } else { 422 mlog(0, "Making ro filesystem writeable.\n"); 423 424 if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { 425 mlog(ML_ERROR, "Cannot remount RDWR " 426 "filesystem due to previous errors.\n"); 427 ret = -EROFS; 428 goto unlock_osb; 429 } 430 incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP); 431 if (incompat_features) { 432 mlog(ML_ERROR, "Cannot remount RDWR because " 433 "of unsupported optional features " 434 "(%x).\n", incompat_features); 435 ret = -EINVAL; 436 goto unlock_osb; 437 } 438 sb->s_flags &= ~MS_RDONLY; 439 osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; 440 } 441 unlock_osb: 442 spin_unlock(&osb->osb_lock); 443 } 444 445 if (!ret) { 446 /* Only save off the new mount options in case of a successful 447 * remount. */ 448 osb->s_mount_opt = parsed_options.mount_opt; 449 osb->s_atime_quantum = parsed_options.atime_quantum; 450 osb->preferred_slot = parsed_options.slot; 451 if (parsed_options.commit_interval) 452 osb->osb_commit_interval = parsed_options.commit_interval; 453 454 if (!ocfs2_is_hard_readonly(osb)) 455 ocfs2_set_journal_params(osb); 456 } 457 out: 458 return ret; 459 } 460 461 static int ocfs2_sb_probe(struct super_block *sb, 462 struct buffer_head **bh, 463 int *sector_size) 464 { 465 int status, tmpstat; 466 struct ocfs1_vol_disk_hdr *hdr; 467 struct ocfs2_dinode *di; 468 int blksize; 469 470 *bh = NULL; 471 472 /* may be > 512 */ 473 *sector_size = bdev_hardsect_size(sb->s_bdev); 474 if (*sector_size > OCFS2_MAX_BLOCKSIZE) { 475 mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", 476 *sector_size, OCFS2_MAX_BLOCKSIZE); 477 status = -EINVAL; 478 goto bail; 479 } 480 481 /* Can this really happen? */ 482 if (*sector_size < OCFS2_MIN_BLOCKSIZE) 483 *sector_size = OCFS2_MIN_BLOCKSIZE; 484 485 /* check block zero for old format */ 486 status = ocfs2_get_sector(sb, bh, 0, *sector_size); 487 if (status < 0) { 488 mlog_errno(status); 489 goto bail; 490 } 491 hdr = (struct ocfs1_vol_disk_hdr *) (*bh)->b_data; 492 if (hdr->major_version == OCFS1_MAJOR_VERSION) { 493 mlog(ML_ERROR, "incompatible version: %u.%u\n", 494 hdr->major_version, hdr->minor_version); 495 status = -EINVAL; 496 } 497 if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE, 498 strlen(OCFS1_VOLUME_SIGNATURE)) == 0) { 499 mlog(ML_ERROR, "incompatible volume signature: %8s\n", 500 hdr->signature); 501 status = -EINVAL; 502 } 503 brelse(*bh); 504 *bh = NULL; 505 if (status < 0) { 506 mlog(ML_ERROR, "This is an ocfs v1 filesystem which must be " 507 "upgraded before mounting with ocfs v2\n"); 508 goto bail; 509 } 510 511 /* 512 * Now check at magic offset for 512, 1024, 2048, 4096 513 * blocksizes. 4096 is the maximum blocksize because it is 514 * the minimum clustersize. 515 */ 516 status = -EINVAL; 517 for (blksize = *sector_size; 518 blksize <= OCFS2_MAX_BLOCKSIZE; 519 blksize <<= 1) { 520 tmpstat = ocfs2_get_sector(sb, bh, 521 OCFS2_SUPER_BLOCK_BLKNO, 522 blksize); 523 if (tmpstat < 0) { 524 status = tmpstat; 525 mlog_errno(status); 526 goto bail; 527 } 528 di = (struct ocfs2_dinode *) (*bh)->b_data; 529 status = ocfs2_verify_volume(di, *bh, blksize); 530 if (status >= 0) 531 goto bail; 532 brelse(*bh); 533 *bh = NULL; 534 if (status != -EAGAIN) 535 break; 536 } 537 538 bail: 539 return status; 540 } 541 542 static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) 543 { 544 if (ocfs2_mount_local(osb)) { 545 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { 546 mlog(ML_ERROR, "Cannot heartbeat on a locally " 547 "mounted device.\n"); 548 return -EINVAL; 549 } 550 } 551 552 if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 553 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { 554 mlog(ML_ERROR, "Heartbeat has to be started to mount " 555 "a read-write clustered device.\n"); 556 return -EINVAL; 557 } 558 } 559 560 return 0; 561 } 562 563 static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 564 { 565 struct dentry *root; 566 int status, sector_size; 567 struct mount_options parsed_options; 568 struct inode *inode = NULL; 569 struct ocfs2_super *osb = NULL; 570 struct buffer_head *bh = NULL; 571 char nodestr[8]; 572 573 mlog_entry("%p, %p, %i", sb, data, silent); 574 575 if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { 576 status = -EINVAL; 577 goto read_super_error; 578 } 579 580 /* for now we only have one cluster/node, make sure we see it 581 * in the heartbeat universe */ 582 if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) { 583 if (!o2hb_check_local_node_heartbeating()) { 584 status = -EINVAL; 585 goto read_super_error; 586 } 587 } 588 589 /* probe for superblock */ 590 status = ocfs2_sb_probe(sb, &bh, §or_size); 591 if (status < 0) { 592 mlog(ML_ERROR, "superblock probe failed!\n"); 593 goto read_super_error; 594 } 595 596 status = ocfs2_initialize_super(sb, bh, sector_size); 597 osb = OCFS2_SB(sb); 598 if (status < 0) { 599 mlog_errno(status); 600 goto read_super_error; 601 } 602 brelse(bh); 603 bh = NULL; 604 osb->s_mount_opt = parsed_options.mount_opt; 605 osb->s_atime_quantum = parsed_options.atime_quantum; 606 osb->preferred_slot = parsed_options.slot; 607 osb->osb_commit_interval = parsed_options.commit_interval; 608 osb->local_alloc_size = parsed_options.localalloc_opt; 609 610 sb->s_magic = OCFS2_SUPER_MAGIC; 611 612 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, 613 * heartbeat=none */ 614 if (bdev_read_only(sb->s_bdev)) { 615 if (!(sb->s_flags & MS_RDONLY)) { 616 status = -EACCES; 617 mlog(ML_ERROR, "Readonly device detected but readonly " 618 "mount was not specified.\n"); 619 goto read_super_error; 620 } 621 622 /* You should not be able to start a local heartbeat 623 * on a readonly device. */ 624 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { 625 status = -EROFS; 626 mlog(ML_ERROR, "Local heartbeat specified on readonly " 627 "device.\n"); 628 goto read_super_error; 629 } 630 631 status = ocfs2_check_journals_nolocks(osb); 632 if (status < 0) { 633 if (status == -EROFS) 634 mlog(ML_ERROR, "Recovery required on readonly " 635 "file system, but write access is " 636 "unavailable.\n"); 637 else 638 mlog_errno(status); 639 goto read_super_error; 640 } 641 642 ocfs2_set_ro_flag(osb, 1); 643 644 printk(KERN_NOTICE "Readonly device detected. No cluster " 645 "services will be utilized for this mount. Recovery " 646 "will be skipped.\n"); 647 } 648 649 if (!ocfs2_is_hard_readonly(osb)) { 650 if (sb->s_flags & MS_RDONLY) 651 ocfs2_set_ro_flag(osb, 0); 652 } 653 654 status = ocfs2_verify_heartbeat(osb); 655 if (status < 0) { 656 mlog_errno(status); 657 goto read_super_error; 658 } 659 660 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, 661 ocfs2_debugfs_root); 662 if (!osb->osb_debug_root) { 663 status = -EINVAL; 664 mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); 665 goto read_super_error; 666 } 667 668 status = ocfs2_mount_volume(sb); 669 if (osb->root_inode) 670 inode = igrab(osb->root_inode); 671 672 if (status < 0) 673 goto read_super_error; 674 675 if (!inode) { 676 status = -EIO; 677 mlog_errno(status); 678 goto read_super_error; 679 } 680 681 root = d_alloc_root(inode); 682 if (!root) { 683 status = -ENOMEM; 684 mlog_errno(status); 685 goto read_super_error; 686 } 687 688 sb->s_root = root; 689 690 ocfs2_complete_mount_recovery(osb); 691 692 if (ocfs2_mount_local(osb)) 693 snprintf(nodestr, sizeof(nodestr), "local"); 694 else 695 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 696 697 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " 698 "with %s data mode.\n", 699 osb->dev_str, nodestr, osb->slot_num, 700 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : 701 "ordered"); 702 703 atomic_set(&osb->vol_state, VOLUME_MOUNTED); 704 wake_up(&osb->osb_mount_event); 705 706 mlog_exit(status); 707 return status; 708 709 read_super_error: 710 if (bh != NULL) 711 brelse(bh); 712 713 if (inode) 714 iput(inode); 715 716 if (osb) { 717 atomic_set(&osb->vol_state, VOLUME_DISABLED); 718 wake_up(&osb->osb_mount_event); 719 ocfs2_dismount_volume(sb, 1); 720 } 721 722 mlog_exit(status); 723 return status; 724 } 725 726 static int ocfs2_get_sb(struct file_system_type *fs_type, 727 int flags, 728 const char *dev_name, 729 void *data, 730 struct vfsmount *mnt) 731 { 732 return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, 733 mnt); 734 } 735 736 static struct file_system_type ocfs2_fs_type = { 737 .owner = THIS_MODULE, 738 .name = "ocfs2", 739 .get_sb = ocfs2_get_sb, /* is this called when we mount 740 * the fs? */ 741 .kill_sb = kill_block_super, /* set to the generic one 742 * right now, but do we 743 * need to change that? */ 744 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 745 .next = NULL 746 }; 747 748 static int ocfs2_parse_options(struct super_block *sb, 749 char *options, 750 struct mount_options *mopt, 751 int is_remount) 752 { 753 int status; 754 char *p; 755 756 mlog_entry("remount: %d, options: \"%s\"\n", is_remount, 757 options ? options : "(none)"); 758 759 mopt->commit_interval = 0; 760 mopt->mount_opt = 0; 761 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 762 mopt->slot = OCFS2_INVALID_SLOT; 763 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 764 765 if (!options) { 766 status = 1; 767 goto bail; 768 } 769 770 while ((p = strsep(&options, ",")) != NULL) { 771 int token, option; 772 substring_t args[MAX_OPT_ARGS]; 773 774 if (!*p) 775 continue; 776 777 token = match_token(p, tokens, args); 778 switch (token) { 779 case Opt_hb_local: 780 mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; 781 break; 782 case Opt_hb_none: 783 mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; 784 break; 785 case Opt_barrier: 786 if (match_int(&args[0], &option)) { 787 status = 0; 788 goto bail; 789 } 790 if (option) 791 mopt->mount_opt |= OCFS2_MOUNT_BARRIER; 792 else 793 mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER; 794 break; 795 case Opt_intr: 796 mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR; 797 break; 798 case Opt_nointr: 799 mopt->mount_opt |= OCFS2_MOUNT_NOINTR; 800 break; 801 case Opt_err_panic: 802 mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; 803 break; 804 case Opt_err_ro: 805 mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; 806 break; 807 case Opt_data_ordered: 808 mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; 809 break; 810 case Opt_data_writeback: 811 mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; 812 break; 813 case Opt_atime_quantum: 814 if (match_int(&args[0], &option)) { 815 status = 0; 816 goto bail; 817 } 818 if (option >= 0) 819 mopt->atime_quantum = option; 820 break; 821 case Opt_slot: 822 option = 0; 823 if (match_int(&args[0], &option)) { 824 status = 0; 825 goto bail; 826 } 827 if (option) 828 mopt->slot = (s16)option; 829 break; 830 case Opt_commit: 831 option = 0; 832 if (match_int(&args[0], &option)) { 833 status = 0; 834 goto bail; 835 } 836 if (option < 0) 837 return 0; 838 if (option == 0) 839 option = JBD_DEFAULT_MAX_COMMIT_AGE; 840 mopt->commit_interval = HZ * option; 841 break; 842 case Opt_localalloc: 843 option = 0; 844 if (match_int(&args[0], &option)) { 845 status = 0; 846 goto bail; 847 } 848 if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) 849 mopt->localalloc_opt = option; 850 break; 851 default: 852 mlog(ML_ERROR, 853 "Unrecognized mount option \"%s\" " 854 "or missing value\n", p); 855 status = 0; 856 goto bail; 857 } 858 } 859 860 status = 1; 861 862 bail: 863 mlog_exit(status); 864 return status; 865 } 866 867 static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) 868 { 869 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); 870 unsigned long opts = osb->s_mount_opt; 871 872 if (opts & OCFS2_MOUNT_HB_LOCAL) 873 seq_printf(s, ",_netdev,heartbeat=local"); 874 else 875 seq_printf(s, ",heartbeat=none"); 876 877 if (opts & OCFS2_MOUNT_NOINTR) 878 seq_printf(s, ",nointr"); 879 880 if (opts & OCFS2_MOUNT_DATA_WRITEBACK) 881 seq_printf(s, ",data=writeback"); 882 else 883 seq_printf(s, ",data=ordered"); 884 885 if (opts & OCFS2_MOUNT_BARRIER) 886 seq_printf(s, ",barrier=1"); 887 888 if (opts & OCFS2_MOUNT_ERRORS_PANIC) 889 seq_printf(s, ",errors=panic"); 890 else 891 seq_printf(s, ",errors=remount-ro"); 892 893 if (osb->preferred_slot != OCFS2_INVALID_SLOT) 894 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); 895 896 if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) 897 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); 898 899 if (osb->osb_commit_interval) 900 seq_printf(s, ",commit=%u", 901 (unsigned) (osb->osb_commit_interval / HZ)); 902 903 if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 904 seq_printf(s, ",localalloc=%d", osb->local_alloc_size); 905 906 return 0; 907 } 908 909 static int __init ocfs2_init(void) 910 { 911 int status; 912 913 mlog_entry_void(); 914 915 ocfs2_print_version(); 916 917 status = init_ocfs2_uptodate_cache(); 918 if (status < 0) { 919 mlog_errno(status); 920 goto leave; 921 } 922 923 status = ocfs2_initialize_mem_caches(); 924 if (status < 0) { 925 mlog_errno(status); 926 goto leave; 927 } 928 929 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq"); 930 if (!ocfs2_wq) { 931 status = -ENOMEM; 932 goto leave; 933 } 934 935 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 936 if (!ocfs2_debugfs_root) { 937 status = -EFAULT; 938 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 939 } 940 941 leave: 942 if (status < 0) { 943 ocfs2_free_mem_caches(); 944 exit_ocfs2_uptodate_cache(); 945 } 946 947 mlog_exit(status); 948 949 if (status >= 0) { 950 return register_filesystem(&ocfs2_fs_type); 951 } else 952 return -1; 953 } 954 955 static void __exit ocfs2_exit(void) 956 { 957 mlog_entry_void(); 958 959 if (ocfs2_wq) { 960 flush_workqueue(ocfs2_wq); 961 destroy_workqueue(ocfs2_wq); 962 } 963 964 debugfs_remove(ocfs2_debugfs_root); 965 966 ocfs2_free_mem_caches(); 967 968 unregister_filesystem(&ocfs2_fs_type); 969 970 exit_ocfs2_uptodate_cache(); 971 972 mlog_exit_void(); 973 } 974 975 static void ocfs2_put_super(struct super_block *sb) 976 { 977 mlog_entry("(0x%p)\n", sb); 978 979 ocfs2_sync_blockdev(sb); 980 ocfs2_dismount_volume(sb, 0); 981 982 mlog_exit_void(); 983 } 984 985 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) 986 { 987 struct ocfs2_super *osb; 988 u32 numbits, freebits; 989 int status; 990 struct ocfs2_dinode *bm_lock; 991 struct buffer_head *bh = NULL; 992 struct inode *inode = NULL; 993 994 mlog_entry("(%p, %p)\n", dentry->d_sb, buf); 995 996 osb = OCFS2_SB(dentry->d_sb); 997 998 inode = ocfs2_get_system_file_inode(osb, 999 GLOBAL_BITMAP_SYSTEM_INODE, 1000 OCFS2_INVALID_SLOT); 1001 if (!inode) { 1002 mlog(ML_ERROR, "failed to get bitmap inode\n"); 1003 status = -EIO; 1004 goto bail; 1005 } 1006 1007 status = ocfs2_inode_lock(inode, &bh, 0); 1008 if (status < 0) { 1009 mlog_errno(status); 1010 goto bail; 1011 } 1012 1013 bm_lock = (struct ocfs2_dinode *) bh->b_data; 1014 1015 numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total); 1016 freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used); 1017 1018 buf->f_type = OCFS2_SUPER_MAGIC; 1019 buf->f_bsize = dentry->d_sb->s_blocksize; 1020 buf->f_namelen = OCFS2_MAX_FILENAME_LEN; 1021 buf->f_blocks = ((sector_t) numbits) * 1022 (osb->s_clustersize >> osb->sb->s_blocksize_bits); 1023 buf->f_bfree = ((sector_t) freebits) * 1024 (osb->s_clustersize >> osb->sb->s_blocksize_bits); 1025 buf->f_bavail = buf->f_bfree; 1026 buf->f_files = numbits; 1027 buf->f_ffree = freebits; 1028 1029 brelse(bh); 1030 1031 ocfs2_inode_unlock(inode, 0); 1032 status = 0; 1033 bail: 1034 if (inode) 1035 iput(inode); 1036 1037 mlog_exit(status); 1038 1039 return status; 1040 } 1041 1042 static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) 1043 { 1044 struct ocfs2_inode_info *oi = data; 1045 1046 oi->ip_flags = 0; 1047 oi->ip_open_count = 0; 1048 spin_lock_init(&oi->ip_lock); 1049 ocfs2_extent_map_init(&oi->vfs_inode); 1050 INIT_LIST_HEAD(&oi->ip_io_markers); 1051 oi->ip_created_trans = 0; 1052 oi->ip_last_trans = 0; 1053 oi->ip_dir_start_lookup = 0; 1054 1055 init_rwsem(&oi->ip_alloc_sem); 1056 mutex_init(&oi->ip_io_mutex); 1057 1058 oi->ip_blkno = 0ULL; 1059 oi->ip_clusters = 0; 1060 1061 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 1062 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1063 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1064 1065 ocfs2_metadata_cache_init(&oi->vfs_inode); 1066 1067 inode_init_once(&oi->vfs_inode); 1068 } 1069 1070 static int ocfs2_initialize_mem_caches(void) 1071 { 1072 ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache", 1073 sizeof(struct ocfs2_inode_info), 1074 0, 1075 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 1076 SLAB_MEM_SPREAD), 1077 ocfs2_inode_init_once); 1078 if (!ocfs2_inode_cachep) 1079 return -ENOMEM; 1080 1081 return 0; 1082 } 1083 1084 static void ocfs2_free_mem_caches(void) 1085 { 1086 if (ocfs2_inode_cachep) 1087 kmem_cache_destroy(ocfs2_inode_cachep); 1088 1089 ocfs2_inode_cachep = NULL; 1090 } 1091 1092 static int ocfs2_get_sector(struct super_block *sb, 1093 struct buffer_head **bh, 1094 int block, 1095 int sect_size) 1096 { 1097 if (!sb_set_blocksize(sb, sect_size)) { 1098 mlog(ML_ERROR, "unable to set blocksize\n"); 1099 return -EIO; 1100 } 1101 1102 *bh = sb_getblk(sb, block); 1103 if (!*bh) { 1104 mlog_errno(-EIO); 1105 return -EIO; 1106 } 1107 lock_buffer(*bh); 1108 if (!buffer_dirty(*bh)) 1109 clear_buffer_uptodate(*bh); 1110 unlock_buffer(*bh); 1111 ll_rw_block(READ, 1, bh); 1112 wait_on_buffer(*bh); 1113 return 0; 1114 } 1115 1116 /* ocfs2 1.0 only allows one cluster and node identity per kernel image. */ 1117 static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) 1118 { 1119 int status; 1120 1121 /* XXX hold a ref on the node while mounte? easy enough, if 1122 * desirable. */ 1123 if (ocfs2_mount_local(osb)) 1124 osb->node_num = 0; 1125 else 1126 osb->node_num = o2nm_this_node(); 1127 1128 if (osb->node_num == O2NM_MAX_NODES) { 1129 mlog(ML_ERROR, "could not find this host's node number\n"); 1130 status = -ENOENT; 1131 goto bail; 1132 } 1133 1134 mlog(0, "I am node %d\n", osb->node_num); 1135 1136 status = 0; 1137 bail: 1138 return status; 1139 } 1140 1141 static int ocfs2_mount_volume(struct super_block *sb) 1142 { 1143 int status = 0; 1144 int unlock_super = 0; 1145 struct ocfs2_super *osb = OCFS2_SB(sb); 1146 1147 mlog_entry_void(); 1148 1149 if (ocfs2_is_hard_readonly(osb)) 1150 goto leave; 1151 1152 status = ocfs2_fill_local_node_info(osb); 1153 if (status < 0) { 1154 mlog_errno(status); 1155 goto leave; 1156 } 1157 1158 status = ocfs2_dlm_init(osb); 1159 if (status < 0) { 1160 mlog_errno(status); 1161 goto leave; 1162 } 1163 1164 status = ocfs2_super_lock(osb, 1); 1165 if (status < 0) { 1166 mlog_errno(status); 1167 goto leave; 1168 } 1169 unlock_super = 1; 1170 1171 /* This will load up the node map and add ourselves to it. */ 1172 status = ocfs2_find_slot(osb); 1173 if (status < 0) { 1174 mlog_errno(status); 1175 goto leave; 1176 } 1177 1178 /* load all node-local system inodes */ 1179 status = ocfs2_init_local_system_inodes(osb); 1180 if (status < 0) { 1181 mlog_errno(status); 1182 goto leave; 1183 } 1184 1185 status = ocfs2_check_volume(osb); 1186 if (status < 0) { 1187 mlog_errno(status); 1188 goto leave; 1189 } 1190 1191 status = ocfs2_truncate_log_init(osb); 1192 if (status < 0) { 1193 mlog_errno(status); 1194 goto leave; 1195 } 1196 1197 if (ocfs2_mount_local(osb)) 1198 goto leave; 1199 1200 leave: 1201 if (unlock_super) 1202 ocfs2_super_unlock(osb, 1); 1203 1204 mlog_exit(status); 1205 return status; 1206 } 1207 1208 /* we can't grab the goofy sem lock from inside wait_event, so we use 1209 * memory barriers to make sure that we'll see the null task before 1210 * being woken up */ 1211 static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) 1212 { 1213 mb(); 1214 return osb->recovery_thread_task != NULL; 1215 } 1216 1217 static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) 1218 { 1219 int tmp; 1220 struct ocfs2_super *osb = NULL; 1221 char nodestr[8]; 1222 1223 mlog_entry("(0x%p)\n", sb); 1224 1225 BUG_ON(!sb); 1226 osb = OCFS2_SB(sb); 1227 BUG_ON(!osb); 1228 1229 ocfs2_shutdown_local_alloc(osb); 1230 1231 ocfs2_truncate_log_shutdown(osb); 1232 1233 /* disable any new recovery threads and wait for any currently 1234 * running ones to exit. Do this before setting the vol_state. */ 1235 mutex_lock(&osb->recovery_lock); 1236 osb->disable_recovery = 1; 1237 mutex_unlock(&osb->recovery_lock); 1238 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); 1239 1240 /* At this point, we know that no more recovery threads can be 1241 * launched, so wait for any recovery completion work to 1242 * complete. */ 1243 flush_workqueue(ocfs2_wq); 1244 1245 ocfs2_journal_shutdown(osb); 1246 1247 ocfs2_sync_blockdev(sb); 1248 1249 /* No dlm means we've failed during mount, so skip all the 1250 * steps which depended on that to complete. */ 1251 if (osb->dlm) { 1252 tmp = ocfs2_super_lock(osb, 1); 1253 if (tmp < 0) { 1254 mlog_errno(tmp); 1255 return; 1256 } 1257 } 1258 1259 if (osb->slot_num != OCFS2_INVALID_SLOT) 1260 ocfs2_put_slot(osb); 1261 1262 if (osb->dlm) 1263 ocfs2_super_unlock(osb, 1); 1264 1265 ocfs2_release_system_inodes(osb); 1266 1267 if (osb->dlm) 1268 ocfs2_dlm_shutdown(osb); 1269 1270 debugfs_remove(osb->osb_debug_root); 1271 1272 if (!mnt_err) 1273 ocfs2_stop_heartbeat(osb); 1274 1275 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); 1276 1277 if (ocfs2_mount_local(osb)) 1278 snprintf(nodestr, sizeof(nodestr), "local"); 1279 else 1280 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 1281 1282 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", 1283 osb->dev_str, nodestr); 1284 1285 ocfs2_delete_osb(osb); 1286 kfree(osb); 1287 sb->s_dev = 0; 1288 sb->s_fs_info = NULL; 1289 } 1290 1291 static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uuid, 1292 unsigned uuid_bytes) 1293 { 1294 int i, ret; 1295 char *ptr; 1296 1297 BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN); 1298 1299 osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL); 1300 if (osb->uuid_str == NULL) 1301 return -ENOMEM; 1302 1303 for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) { 1304 /* print with null */ 1305 ret = snprintf(ptr, 3, "%02X", uuid[i]); 1306 if (ret != 2) /* drop super cleans up */ 1307 return -EINVAL; 1308 /* then only advance past the last char */ 1309 ptr += 2; 1310 } 1311 1312 return 0; 1313 } 1314 1315 static int ocfs2_initialize_super(struct super_block *sb, 1316 struct buffer_head *bh, 1317 int sector_size) 1318 { 1319 int status; 1320 int i, cbits, bbits; 1321 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1322 struct inode *inode = NULL; 1323 struct ocfs2_journal *journal; 1324 __le32 uuid_net_key; 1325 struct ocfs2_super *osb; 1326 1327 mlog_entry_void(); 1328 1329 osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL); 1330 if (!osb) { 1331 status = -ENOMEM; 1332 mlog_errno(status); 1333 goto bail; 1334 } 1335 1336 sb->s_fs_info = osb; 1337 sb->s_op = &ocfs2_sops; 1338 sb->s_export_op = &ocfs2_export_ops; 1339 sb->s_time_gran = 1; 1340 sb->s_flags |= MS_NOATIME; 1341 /* this is needed to support O_LARGEFILE */ 1342 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); 1343 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); 1344 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); 1345 1346 osb->sb = sb; 1347 /* Save off for ocfs2_rw_direct */ 1348 osb->s_sectsize_bits = blksize_bits(sector_size); 1349 BUG_ON(!osb->s_sectsize_bits); 1350 1351 init_waitqueue_head(&osb->recovery_event); 1352 spin_lock_init(&osb->dc_task_lock); 1353 init_waitqueue_head(&osb->dc_event); 1354 osb->dc_work_sequence = 0; 1355 osb->dc_wake_sequence = 0; 1356 INIT_LIST_HEAD(&osb->blocked_lock_list); 1357 osb->blocked_lock_count = 0; 1358 spin_lock_init(&osb->osb_lock); 1359 1360 atomic_set(&osb->alloc_stats.moves, 0); 1361 atomic_set(&osb->alloc_stats.local_data, 0); 1362 atomic_set(&osb->alloc_stats.bitmap_data, 0); 1363 atomic_set(&osb->alloc_stats.bg_allocs, 0); 1364 atomic_set(&osb->alloc_stats.bg_extends, 0); 1365 1366 ocfs2_init_node_maps(osb); 1367 1368 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 1369 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1370 1371 mutex_init(&osb->recovery_lock); 1372 1373 osb->disable_recovery = 0; 1374 osb->recovery_thread_task = NULL; 1375 1376 init_waitqueue_head(&osb->checkpoint_event); 1377 atomic_set(&osb->needs_checkpoint, 0); 1378 1379 osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 1380 1381 osb->node_num = O2NM_INVALID_NODE_NUM; 1382 osb->slot_num = OCFS2_INVALID_SLOT; 1383 1384 osb->local_alloc_state = OCFS2_LA_UNUSED; 1385 osb->local_alloc_bh = NULL; 1386 1387 ocfs2_setup_hb_callbacks(osb); 1388 1389 init_waitqueue_head(&osb->osb_mount_event); 1390 1391 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 1392 if (!osb->vol_label) { 1393 mlog(ML_ERROR, "unable to alloc vol label\n"); 1394 status = -ENOMEM; 1395 goto bail; 1396 } 1397 1398 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); 1399 if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { 1400 mlog(ML_ERROR, "Invalid number of node slots (%u)\n", 1401 osb->max_slots); 1402 status = -EINVAL; 1403 goto bail; 1404 } 1405 mlog(0, "max_slots for this device: %u\n", osb->max_slots); 1406 1407 init_waitqueue_head(&osb->osb_wipe_event); 1408 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1409 sizeof(*osb->osb_orphan_wipes), 1410 GFP_KERNEL); 1411 if (!osb->osb_orphan_wipes) { 1412 status = -ENOMEM; 1413 mlog_errno(status); 1414 goto bail; 1415 } 1416 1417 osb->s_feature_compat = 1418 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); 1419 osb->s_feature_ro_compat = 1420 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat); 1421 osb->s_feature_incompat = 1422 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat); 1423 1424 if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) { 1425 mlog(ML_ERROR, "couldn't mount because of unsupported " 1426 "optional features (%x).\n", i); 1427 status = -EINVAL; 1428 goto bail; 1429 } 1430 if (!(osb->sb->s_flags & MS_RDONLY) && 1431 (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) { 1432 mlog(ML_ERROR, "couldn't mount RDWR because of " 1433 "unsupported optional features (%x).\n", i); 1434 status = -EINVAL; 1435 goto bail; 1436 } 1437 1438 get_random_bytes(&osb->s_next_generation, sizeof(u32)); 1439 1440 /* FIXME 1441 * This should be done in ocfs2_journal_init(), but unknown 1442 * ordering issues will cause the filesystem to crash. 1443 * If anyone wants to figure out what part of the code 1444 * refers to osb->journal before ocfs2_journal_init() is run, 1445 * be my guest. 1446 */ 1447 /* initialize our journal structure */ 1448 1449 journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); 1450 if (!journal) { 1451 mlog(ML_ERROR, "unable to alloc journal\n"); 1452 status = -ENOMEM; 1453 goto bail; 1454 } 1455 osb->journal = journal; 1456 journal->j_osb = osb; 1457 1458 atomic_set(&journal->j_num_trans, 0); 1459 init_rwsem(&journal->j_trans_barrier); 1460 init_waitqueue_head(&journal->j_checkpointed); 1461 spin_lock_init(&journal->j_lock); 1462 journal->j_trans_id = (unsigned long) 1; 1463 INIT_LIST_HEAD(&journal->j_la_cleanups); 1464 INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); 1465 journal->j_state = OCFS2_JOURNAL_FREE; 1466 1467 /* get some pseudo constants for clustersize bits */ 1468 osb->s_clustersize_bits = 1469 le32_to_cpu(di->id2.i_super.s_clustersize_bits); 1470 osb->s_clustersize = 1 << osb->s_clustersize_bits; 1471 mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits); 1472 1473 if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE || 1474 osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) { 1475 mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n", 1476 osb->s_clustersize); 1477 status = -EINVAL; 1478 goto bail; 1479 } 1480 1481 if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) 1482 > (u32)~0UL) { 1483 mlog(ML_ERROR, "Volume might try to write to blocks beyond " 1484 "what jbd can address in 32 bits.\n"); 1485 status = -EINVAL; 1486 goto bail; 1487 } 1488 1489 if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid, 1490 sizeof(di->id2.i_super.s_uuid))) { 1491 mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n"); 1492 status = -ENOMEM; 1493 goto bail; 1494 } 1495 1496 memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); 1497 1498 strncpy(osb->vol_label, di->id2.i_super.s_label, 63); 1499 osb->vol_label[63] = '\0'; 1500 osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); 1501 osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno); 1502 osb->first_cluster_group_blkno = 1503 le64_to_cpu(di->id2.i_super.s_first_cluster_group); 1504 osb->fs_generation = le32_to_cpu(di->i_fs_generation); 1505 mlog(0, "vol_label: %s\n", osb->vol_label); 1506 mlog(0, "uuid: %s\n", osb->uuid_str); 1507 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", 1508 (unsigned long long)osb->root_blkno, 1509 (unsigned long long)osb->system_dir_blkno); 1510 1511 osb->osb_dlm_debug = ocfs2_new_dlm_debug(); 1512 if (!osb->osb_dlm_debug) { 1513 status = -ENOMEM; 1514 mlog_errno(status); 1515 goto bail; 1516 } 1517 1518 atomic_set(&osb->vol_state, VOLUME_INIT); 1519 1520 /* load root, system_dir, and all global system inodes */ 1521 status = ocfs2_init_global_system_inodes(osb); 1522 if (status < 0) { 1523 mlog_errno(status); 1524 goto bail; 1525 } 1526 1527 /* 1528 * global bitmap 1529 */ 1530 inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, 1531 OCFS2_INVALID_SLOT); 1532 if (!inode) { 1533 status = -EINVAL; 1534 mlog_errno(status); 1535 goto bail; 1536 } 1537 1538 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 1539 iput(inode); 1540 1541 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; 1542 1543 status = ocfs2_init_slot_info(osb); 1544 if (status < 0) { 1545 mlog_errno(status); 1546 goto bail; 1547 } 1548 1549 bail: 1550 mlog_exit(status); 1551 return status; 1552 } 1553 1554 /* 1555 * will return: -EAGAIN if it is ok to keep searching for superblocks 1556 * -EINVAL if there is a bad superblock 1557 * 0 on success 1558 */ 1559 static int ocfs2_verify_volume(struct ocfs2_dinode *di, 1560 struct buffer_head *bh, 1561 u32 blksz) 1562 { 1563 int status = -EAGAIN; 1564 1565 mlog_entry_void(); 1566 1567 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, 1568 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { 1569 status = -EINVAL; 1570 if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { 1571 mlog(ML_ERROR, "found superblock with incorrect block " 1572 "size: found %u, should be %u\n", 1573 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits), 1574 blksz); 1575 } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != 1576 OCFS2_MAJOR_REV_LEVEL || 1577 le16_to_cpu(di->id2.i_super.s_minor_rev_level) != 1578 OCFS2_MINOR_REV_LEVEL) { 1579 mlog(ML_ERROR, "found superblock with bad version: " 1580 "found %u.%u, should be %u.%u\n", 1581 le16_to_cpu(di->id2.i_super.s_major_rev_level), 1582 le16_to_cpu(di->id2.i_super.s_minor_rev_level), 1583 OCFS2_MAJOR_REV_LEVEL, 1584 OCFS2_MINOR_REV_LEVEL); 1585 } else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) { 1586 mlog(ML_ERROR, "bad block number on superblock: " 1587 "found %llu, should be %llu\n", 1588 (unsigned long long)le64_to_cpu(di->i_blkno), 1589 (unsigned long long)bh->b_blocknr); 1590 } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 || 1591 le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) { 1592 mlog(ML_ERROR, "bad cluster size found: %u\n", 1593 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits)); 1594 } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) { 1595 mlog(ML_ERROR, "bad root_blkno: 0\n"); 1596 } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) { 1597 mlog(ML_ERROR, "bad system_dir_blkno: 0\n"); 1598 } else if (le16_to_cpu(di->id2.i_super.s_max_slots) > OCFS2_MAX_SLOTS) { 1599 mlog(ML_ERROR, 1600 "Superblock slots found greater than file system " 1601 "maximum: found %u, max %u\n", 1602 le16_to_cpu(di->id2.i_super.s_max_slots), 1603 OCFS2_MAX_SLOTS); 1604 } else { 1605 /* found it! */ 1606 status = 0; 1607 } 1608 } 1609 1610 mlog_exit(status); 1611 return status; 1612 } 1613 1614 static int ocfs2_check_volume(struct ocfs2_super *osb) 1615 { 1616 int status; 1617 int dirty; 1618 int local; 1619 struct ocfs2_dinode *local_alloc = NULL; /* only used if we 1620 * recover 1621 * ourselves. */ 1622 1623 mlog_entry_void(); 1624 1625 /* Init our journal object. */ 1626 status = ocfs2_journal_init(osb->journal, &dirty); 1627 if (status < 0) { 1628 mlog(ML_ERROR, "Could not initialize journal!\n"); 1629 goto finally; 1630 } 1631 1632 /* If the journal was unmounted cleanly then we don't want to 1633 * recover anything. Otherwise, journal_load will do that 1634 * dirty work for us :) */ 1635 if (!dirty) { 1636 status = ocfs2_journal_wipe(osb->journal, 0); 1637 if (status < 0) { 1638 mlog_errno(status); 1639 goto finally; 1640 } 1641 } else { 1642 mlog(ML_NOTICE, "File system was not unmounted cleanly, " 1643 "recovering volume.\n"); 1644 } 1645 1646 local = ocfs2_mount_local(osb); 1647 1648 /* will play back anything left in the journal. */ 1649 ocfs2_journal_load(osb->journal, local); 1650 1651 if (dirty) { 1652 /* recover my local alloc if we didn't unmount cleanly. */ 1653 status = ocfs2_begin_local_alloc_recovery(osb, 1654 osb->slot_num, 1655 &local_alloc); 1656 if (status < 0) { 1657 mlog_errno(status); 1658 goto finally; 1659 } 1660 /* we complete the recovery process after we've marked 1661 * ourselves as mounted. */ 1662 } 1663 1664 mlog(0, "Journal loaded.\n"); 1665 1666 status = ocfs2_load_local_alloc(osb); 1667 if (status < 0) { 1668 mlog_errno(status); 1669 goto finally; 1670 } 1671 1672 if (dirty) { 1673 /* Recovery will be completed after we've mounted the 1674 * rest of the volume. */ 1675 osb->dirty = 1; 1676 osb->local_alloc_copy = local_alloc; 1677 local_alloc = NULL; 1678 } 1679 1680 /* go through each journal, trylock it and if you get the 1681 * lock, and it's marked as dirty, set the bit in the recover 1682 * map and launch a recovery thread for it. */ 1683 status = ocfs2_mark_dead_nodes(osb); 1684 if (status < 0) 1685 mlog_errno(status); 1686 1687 finally: 1688 if (local_alloc) 1689 kfree(local_alloc); 1690 1691 mlog_exit(status); 1692 return status; 1693 } 1694 1695 /* 1696 * The routine gets called from dismount or close whenever a dismount on 1697 * volume is requested and the osb open count becomes 1. 1698 * It will remove the osb from the global list and also free up all the 1699 * initialized resources and fileobject. 1700 */ 1701 static void ocfs2_delete_osb(struct ocfs2_super *osb) 1702 { 1703 mlog_entry_void(); 1704 1705 /* This function assumes that the caller has the main osb resource */ 1706 1707 if (osb->slot_info) 1708 ocfs2_free_slot_info(osb->slot_info); 1709 1710 kfree(osb->osb_orphan_wipes); 1711 /* FIXME 1712 * This belongs in journal shutdown, but because we have to 1713 * allocate osb->journal at the start of ocfs2_initalize_osb(), 1714 * we free it here. 1715 */ 1716 kfree(osb->journal); 1717 if (osb->local_alloc_copy) 1718 kfree(osb->local_alloc_copy); 1719 kfree(osb->uuid_str); 1720 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 1721 memset(osb, 0, sizeof(struct ocfs2_super)); 1722 1723 mlog_exit_void(); 1724 } 1725 1726 /* Put OCFS2 into a readonly state, or (if the user specifies it), 1727 * panic(). We do not support continue-on-error operation. */ 1728 static void ocfs2_handle_error(struct super_block *sb) 1729 { 1730 struct ocfs2_super *osb = OCFS2_SB(sb); 1731 1732 if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC) 1733 panic("OCFS2: (device %s): panic forced after error\n", 1734 sb->s_id); 1735 1736 ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS); 1737 1738 if (sb->s_flags & MS_RDONLY && 1739 (ocfs2_is_soft_readonly(osb) || 1740 ocfs2_is_hard_readonly(osb))) 1741 return; 1742 1743 printk(KERN_CRIT "File system is now read-only due to the potential " 1744 "of on-disk corruption. Please run fsck.ocfs2 once the file " 1745 "system is unmounted.\n"); 1746 sb->s_flags |= MS_RDONLY; 1747 ocfs2_set_ro_flag(osb, 0); 1748 } 1749 1750 static char error_buf[1024]; 1751 1752 void __ocfs2_error(struct super_block *sb, 1753 const char *function, 1754 const char *fmt, ...) 1755 { 1756 va_list args; 1757 1758 va_start(args, fmt); 1759 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 1760 va_end(args); 1761 1762 /* Not using mlog here because we want to show the actual 1763 * function the error came from. */ 1764 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", 1765 sb->s_id, function, error_buf); 1766 1767 ocfs2_handle_error(sb); 1768 } 1769 1770 /* Handle critical errors. This is intentionally more drastic than 1771 * ocfs2_handle_error, so we only use for things like journal errors, 1772 * etc. */ 1773 void __ocfs2_abort(struct super_block* sb, 1774 const char *function, 1775 const char *fmt, ...) 1776 { 1777 va_list args; 1778 1779 va_start(args, fmt); 1780 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 1781 va_end(args); 1782 1783 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", 1784 sb->s_id, function, error_buf); 1785 1786 /* We don't have the cluster support yet to go straight to 1787 * hard readonly in here. Until then, we want to keep 1788 * ocfs2_abort() so that we can at least mark critical 1789 * errors. 1790 * 1791 * TODO: This should abort the journal and alert other nodes 1792 * that our slot needs recovery. */ 1793 1794 /* Force a panic(). This stinks, but it's better than letting 1795 * things continue without having a proper hard readonly 1796 * here. */ 1797 OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; 1798 ocfs2_handle_error(sb); 1799 } 1800 1801 module_init(ocfs2_init); 1802 module_exit(ocfs2_exit); 1803