1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * super.c 5 * 6 * load/unload driver, mount/dismount volumes 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/module.h> 27 #include <linux/fs.h> 28 #include <linux/types.h> 29 #include <linux/slab.h> 30 #include <linux/highmem.h> 31 #include <linux/utsname.h> 32 #include <linux/init.h> 33 #include <linux/random.h> 34 #include <linux/statfs.h> 35 #include <linux/moduleparam.h> 36 #include <linux/blkdev.h> 37 #include <linux/socket.h> 38 #include <linux/inet.h> 39 #include <linux/parser.h> 40 #include <linux/crc32.h> 41 #include <linux/debugfs.h> 42 #include <linux/mount.h> 43 44 #include <cluster/nodemanager.h> 45 46 #define MLOG_MASK_PREFIX ML_SUPER 47 #include <cluster/masklog.h> 48 49 #include "ocfs2.h" 50 51 /* this should be the only file to include a version 1 header */ 52 #include "ocfs1_fs_compat.h" 53 54 #include "alloc.h" 55 #include "dlmglue.h" 56 #include "export.h" 57 #include "extent_map.h" 58 #include "heartbeat.h" 59 #include "inode.h" 60 #include "journal.h" 61 #include "localalloc.h" 62 #include "namei.h" 63 #include "slot_map.h" 64 #include "super.h" 65 #include "sysfile.h" 66 #include "uptodate.h" 67 #include "ver.h" 68 69 #include "buffer_head_io.h" 70 71 static struct kmem_cache *ocfs2_inode_cachep = NULL; 72 73 /* OCFS2 needs to schedule several differnt types of work which 74 * require cluster locking, disk I/O, recovery waits, etc. Since these 75 * types of work tend to be heavy we avoid using the kernel events 76 * workqueue and schedule on our own. */ 77 struct workqueue_struct *ocfs2_wq = NULL; 78 79 static struct dentry *ocfs2_debugfs_root = NULL; 80 81 MODULE_AUTHOR("Oracle"); 82 MODULE_LICENSE("GPL"); 83 84 struct mount_options 85 { 86 unsigned long commit_interval; 87 unsigned long mount_opt; 88 unsigned int atime_quantum; 89 signed short slot; 90 unsigned int localalloc_opt; 91 }; 92 93 static int ocfs2_parse_options(struct super_block *sb, char *options, 94 struct mount_options *mopt, 95 int is_remount); 96 static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 97 static void ocfs2_put_super(struct super_block *sb); 98 static int ocfs2_mount_volume(struct super_block *sb); 99 static int ocfs2_remount(struct super_block *sb, int *flags, char *data); 100 static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err); 101 static int ocfs2_initialize_mem_caches(void); 102 static void ocfs2_free_mem_caches(void); 103 static void ocfs2_delete_osb(struct ocfs2_super *osb); 104 105 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf); 106 107 static int ocfs2_sync_fs(struct super_block *sb, int wait); 108 109 static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); 110 static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); 111 static void ocfs2_release_system_inodes(struct ocfs2_super *osb); 112 static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); 113 static int ocfs2_check_volume(struct ocfs2_super *osb); 114 static int ocfs2_verify_volume(struct ocfs2_dinode *di, 115 struct buffer_head *bh, 116 u32 sectsize); 117 static int ocfs2_initialize_super(struct super_block *sb, 118 struct buffer_head *bh, 119 int sector_size); 120 static int ocfs2_get_sector(struct super_block *sb, 121 struct buffer_head **bh, 122 int block, 123 int sect_size); 124 static void ocfs2_write_super(struct super_block *sb); 125 static struct inode *ocfs2_alloc_inode(struct super_block *sb); 126 static void ocfs2_destroy_inode(struct inode *inode); 127 128 static const struct super_operations ocfs2_sops = { 129 .statfs = ocfs2_statfs, 130 .alloc_inode = ocfs2_alloc_inode, 131 .destroy_inode = ocfs2_destroy_inode, 132 .drop_inode = ocfs2_drop_inode, 133 .clear_inode = ocfs2_clear_inode, 134 .delete_inode = ocfs2_delete_inode, 135 .sync_fs = ocfs2_sync_fs, 136 .write_super = ocfs2_write_super, 137 .put_super = ocfs2_put_super, 138 .remount_fs = ocfs2_remount, 139 .show_options = ocfs2_show_options, 140 }; 141 142 enum { 143 Opt_barrier, 144 Opt_err_panic, 145 Opt_err_ro, 146 Opt_intr, 147 Opt_nointr, 148 Opt_hb_none, 149 Opt_hb_local, 150 Opt_data_ordered, 151 Opt_data_writeback, 152 Opt_atime_quantum, 153 Opt_slot, 154 Opt_commit, 155 Opt_localalloc, 156 Opt_localflocks, 157 Opt_err, 158 }; 159 160 static match_table_t tokens = { 161 {Opt_barrier, "barrier=%u"}, 162 {Opt_err_panic, "errors=panic"}, 163 {Opt_err_ro, "errors=remount-ro"}, 164 {Opt_intr, "intr"}, 165 {Opt_nointr, "nointr"}, 166 {Opt_hb_none, OCFS2_HB_NONE}, 167 {Opt_hb_local, OCFS2_HB_LOCAL}, 168 {Opt_data_ordered, "data=ordered"}, 169 {Opt_data_writeback, "data=writeback"}, 170 {Opt_atime_quantum, "atime_quantum=%u"}, 171 {Opt_slot, "preferred_slot=%u"}, 172 {Opt_commit, "commit=%u"}, 173 {Opt_localalloc, "localalloc=%d"}, 174 {Opt_localflocks, "localflocks"}, 175 {Opt_err, NULL} 176 }; 177 178 /* 179 * write_super and sync_fs ripped right out of ext3. 180 */ 181 static void ocfs2_write_super(struct super_block *sb) 182 { 183 if (mutex_trylock(&sb->s_lock) != 0) 184 BUG(); 185 sb->s_dirt = 0; 186 } 187 188 static int ocfs2_sync_fs(struct super_block *sb, int wait) 189 { 190 int status; 191 tid_t target; 192 struct ocfs2_super *osb = OCFS2_SB(sb); 193 194 sb->s_dirt = 0; 195 196 if (ocfs2_is_hard_readonly(osb)) 197 return -EROFS; 198 199 if (wait) { 200 status = ocfs2_flush_truncate_log(osb); 201 if (status < 0) 202 mlog_errno(status); 203 } else { 204 ocfs2_schedule_truncate_log_flush(osb, 0); 205 } 206 207 if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) { 208 if (wait) 209 log_wait_commit(OCFS2_SB(sb)->journal->j_journal, 210 target); 211 } 212 return 0; 213 } 214 215 static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) 216 { 217 struct inode *new = NULL; 218 int status = 0; 219 int i; 220 221 mlog_entry_void(); 222 223 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); 224 if (IS_ERR(new)) { 225 status = PTR_ERR(new); 226 mlog_errno(status); 227 goto bail; 228 } 229 osb->root_inode = new; 230 231 new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0); 232 if (IS_ERR(new)) { 233 status = PTR_ERR(new); 234 mlog_errno(status); 235 goto bail; 236 } 237 osb->sys_root_inode = new; 238 239 for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; 240 i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { 241 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 242 if (!new) { 243 ocfs2_release_system_inodes(osb); 244 status = -EINVAL; 245 mlog_errno(status); 246 /* FIXME: Should ERROR_RO_FS */ 247 mlog(ML_ERROR, "Unable to load system inode %d, " 248 "possibly corrupt fs?", i); 249 goto bail; 250 } 251 // the array now has one ref, so drop this one 252 iput(new); 253 } 254 255 bail: 256 mlog_exit(status); 257 return status; 258 } 259 260 static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) 261 { 262 struct inode *new = NULL; 263 int status = 0; 264 int i; 265 266 mlog_entry_void(); 267 268 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; 269 i < NUM_SYSTEM_INODES; 270 i++) { 271 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 272 if (!new) { 273 ocfs2_release_system_inodes(osb); 274 status = -EINVAL; 275 mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n", 276 status, i, osb->slot_num); 277 goto bail; 278 } 279 /* the array now has one ref, so drop this one */ 280 iput(new); 281 } 282 283 bail: 284 mlog_exit(status); 285 return status; 286 } 287 288 static void ocfs2_release_system_inodes(struct ocfs2_super *osb) 289 { 290 int i; 291 struct inode *inode; 292 293 mlog_entry_void(); 294 295 for (i = 0; i < NUM_SYSTEM_INODES; i++) { 296 inode = osb->system_inodes[i]; 297 if (inode) { 298 iput(inode); 299 osb->system_inodes[i] = NULL; 300 } 301 } 302 303 inode = osb->sys_root_inode; 304 if (inode) { 305 iput(inode); 306 osb->sys_root_inode = NULL; 307 } 308 309 inode = osb->root_inode; 310 if (inode) { 311 iput(inode); 312 osb->root_inode = NULL; 313 } 314 315 mlog_exit(0); 316 } 317 318 /* We're allocating fs objects, use GFP_NOFS */ 319 static struct inode *ocfs2_alloc_inode(struct super_block *sb) 320 { 321 struct ocfs2_inode_info *oi; 322 323 oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS); 324 if (!oi) 325 return NULL; 326 327 return &oi->vfs_inode; 328 } 329 330 static void ocfs2_destroy_inode(struct inode *inode) 331 { 332 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 333 } 334 335 static unsigned long long ocfs2_max_file_offset(unsigned int bbits, 336 unsigned int cbits) 337 { 338 unsigned int bytes = 1 << cbits; 339 unsigned int trim = bytes; 340 unsigned int bitshift = 32; 341 342 /* 343 * i_size and all block offsets in ocfs2 are always 64 bits 344 * wide. i_clusters is 32 bits, in cluster-sized units. So on 345 * 64 bit platforms, cluster size will be the limiting factor. 346 */ 347 348 #if BITS_PER_LONG == 32 349 # if defined(CONFIG_LBD) 350 BUILD_BUG_ON(sizeof(sector_t) != 8); 351 /* 352 * We might be limited by page cache size. 353 */ 354 if (bytes > PAGE_CACHE_SIZE) { 355 bytes = PAGE_CACHE_SIZE; 356 trim = 1; 357 /* 358 * Shift by 31 here so that we don't get larger than 359 * MAX_LFS_FILESIZE 360 */ 361 bitshift = 31; 362 } 363 # else 364 /* 365 * We are limited by the size of sector_t. Use block size, as 366 * that's what we expose to the VFS. 367 */ 368 bytes = 1 << bbits; 369 trim = 1; 370 bitshift = 31; 371 # endif 372 #endif 373 374 /* 375 * Trim by a whole cluster when we can actually approach the 376 * on-disk limits. Otherwise we can overflow i_clusters when 377 * an extent start is at the max offset. 378 */ 379 return (((unsigned long long)bytes) << bitshift) - trim; 380 } 381 382 static int ocfs2_remount(struct super_block *sb, int *flags, char *data) 383 { 384 int incompat_features; 385 int ret = 0; 386 struct mount_options parsed_options; 387 struct ocfs2_super *osb = OCFS2_SB(sb); 388 389 if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { 390 ret = -EINVAL; 391 goto out; 392 } 393 394 if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != 395 (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 396 ret = -EINVAL; 397 mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); 398 goto out; 399 } 400 401 if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) != 402 (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) { 403 ret = -EINVAL; 404 mlog(ML_ERROR, "Cannot change data mode on remount\n"); 405 goto out; 406 } 407 408 /* We're going to/from readonly mode. */ 409 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 410 /* Lock here so the check of HARD_RO and the potential 411 * setting of SOFT_RO is atomic. */ 412 spin_lock(&osb->osb_lock); 413 if (osb->osb_flags & OCFS2_OSB_HARD_RO) { 414 mlog(ML_ERROR, "Remount on readonly device is forbidden.\n"); 415 ret = -EROFS; 416 goto unlock_osb; 417 } 418 419 if (*flags & MS_RDONLY) { 420 mlog(0, "Going to ro mode.\n"); 421 sb->s_flags |= MS_RDONLY; 422 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 423 } else { 424 mlog(0, "Making ro filesystem writeable.\n"); 425 426 if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { 427 mlog(ML_ERROR, "Cannot remount RDWR " 428 "filesystem due to previous errors.\n"); 429 ret = -EROFS; 430 goto unlock_osb; 431 } 432 incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP); 433 if (incompat_features) { 434 mlog(ML_ERROR, "Cannot remount RDWR because " 435 "of unsupported optional features " 436 "(%x).\n", incompat_features); 437 ret = -EINVAL; 438 goto unlock_osb; 439 } 440 sb->s_flags &= ~MS_RDONLY; 441 osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; 442 } 443 unlock_osb: 444 spin_unlock(&osb->osb_lock); 445 } 446 447 if (!ret) { 448 /* Only save off the new mount options in case of a successful 449 * remount. */ 450 osb->s_mount_opt = parsed_options.mount_opt; 451 osb->s_atime_quantum = parsed_options.atime_quantum; 452 osb->preferred_slot = parsed_options.slot; 453 if (parsed_options.commit_interval) 454 osb->osb_commit_interval = parsed_options.commit_interval; 455 456 if (!ocfs2_is_hard_readonly(osb)) 457 ocfs2_set_journal_params(osb); 458 } 459 out: 460 return ret; 461 } 462 463 static int ocfs2_sb_probe(struct super_block *sb, 464 struct buffer_head **bh, 465 int *sector_size) 466 { 467 int status, tmpstat; 468 struct ocfs1_vol_disk_hdr *hdr; 469 struct ocfs2_dinode *di; 470 int blksize; 471 472 *bh = NULL; 473 474 /* may be > 512 */ 475 *sector_size = bdev_hardsect_size(sb->s_bdev); 476 if (*sector_size > OCFS2_MAX_BLOCKSIZE) { 477 mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n", 478 *sector_size, OCFS2_MAX_BLOCKSIZE); 479 status = -EINVAL; 480 goto bail; 481 } 482 483 /* Can this really happen? */ 484 if (*sector_size < OCFS2_MIN_BLOCKSIZE) 485 *sector_size = OCFS2_MIN_BLOCKSIZE; 486 487 /* check block zero for old format */ 488 status = ocfs2_get_sector(sb, bh, 0, *sector_size); 489 if (status < 0) { 490 mlog_errno(status); 491 goto bail; 492 } 493 hdr = (struct ocfs1_vol_disk_hdr *) (*bh)->b_data; 494 if (hdr->major_version == OCFS1_MAJOR_VERSION) { 495 mlog(ML_ERROR, "incompatible version: %u.%u\n", 496 hdr->major_version, hdr->minor_version); 497 status = -EINVAL; 498 } 499 if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE, 500 strlen(OCFS1_VOLUME_SIGNATURE)) == 0) { 501 mlog(ML_ERROR, "incompatible volume signature: %8s\n", 502 hdr->signature); 503 status = -EINVAL; 504 } 505 brelse(*bh); 506 *bh = NULL; 507 if (status < 0) { 508 mlog(ML_ERROR, "This is an ocfs v1 filesystem which must be " 509 "upgraded before mounting with ocfs v2\n"); 510 goto bail; 511 } 512 513 /* 514 * Now check at magic offset for 512, 1024, 2048, 4096 515 * blocksizes. 4096 is the maximum blocksize because it is 516 * the minimum clustersize. 517 */ 518 status = -EINVAL; 519 for (blksize = *sector_size; 520 blksize <= OCFS2_MAX_BLOCKSIZE; 521 blksize <<= 1) { 522 tmpstat = ocfs2_get_sector(sb, bh, 523 OCFS2_SUPER_BLOCK_BLKNO, 524 blksize); 525 if (tmpstat < 0) { 526 status = tmpstat; 527 mlog_errno(status); 528 goto bail; 529 } 530 di = (struct ocfs2_dinode *) (*bh)->b_data; 531 status = ocfs2_verify_volume(di, *bh, blksize); 532 if (status >= 0) 533 goto bail; 534 brelse(*bh); 535 *bh = NULL; 536 if (status != -EAGAIN) 537 break; 538 } 539 540 bail: 541 return status; 542 } 543 544 static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) 545 { 546 if (ocfs2_mount_local(osb)) { 547 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { 548 mlog(ML_ERROR, "Cannot heartbeat on a locally " 549 "mounted device.\n"); 550 return -EINVAL; 551 } 552 } 553 554 if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 555 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { 556 mlog(ML_ERROR, "Heartbeat has to be started to mount " 557 "a read-write clustered device.\n"); 558 return -EINVAL; 559 } 560 } 561 562 return 0; 563 } 564 565 static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 566 { 567 struct dentry *root; 568 int status, sector_size; 569 struct mount_options parsed_options; 570 struct inode *inode = NULL; 571 struct ocfs2_super *osb = NULL; 572 struct buffer_head *bh = NULL; 573 char nodestr[8]; 574 575 mlog_entry("%p, %p, %i", sb, data, silent); 576 577 if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { 578 status = -EINVAL; 579 goto read_super_error; 580 } 581 582 /* for now we only have one cluster/node, make sure we see it 583 * in the heartbeat universe */ 584 if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) { 585 if (!o2hb_check_local_node_heartbeating()) { 586 status = -EINVAL; 587 goto read_super_error; 588 } 589 } 590 591 /* probe for superblock */ 592 status = ocfs2_sb_probe(sb, &bh, §or_size); 593 if (status < 0) { 594 mlog(ML_ERROR, "superblock probe failed!\n"); 595 goto read_super_error; 596 } 597 598 status = ocfs2_initialize_super(sb, bh, sector_size); 599 osb = OCFS2_SB(sb); 600 if (status < 0) { 601 mlog_errno(status); 602 goto read_super_error; 603 } 604 brelse(bh); 605 bh = NULL; 606 osb->s_mount_opt = parsed_options.mount_opt; 607 osb->s_atime_quantum = parsed_options.atime_quantum; 608 osb->preferred_slot = parsed_options.slot; 609 osb->osb_commit_interval = parsed_options.commit_interval; 610 osb->local_alloc_size = parsed_options.localalloc_opt; 611 612 sb->s_magic = OCFS2_SUPER_MAGIC; 613 614 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, 615 * heartbeat=none */ 616 if (bdev_read_only(sb->s_bdev)) { 617 if (!(sb->s_flags & MS_RDONLY)) { 618 status = -EACCES; 619 mlog(ML_ERROR, "Readonly device detected but readonly " 620 "mount was not specified.\n"); 621 goto read_super_error; 622 } 623 624 /* You should not be able to start a local heartbeat 625 * on a readonly device. */ 626 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { 627 status = -EROFS; 628 mlog(ML_ERROR, "Local heartbeat specified on readonly " 629 "device.\n"); 630 goto read_super_error; 631 } 632 633 status = ocfs2_check_journals_nolocks(osb); 634 if (status < 0) { 635 if (status == -EROFS) 636 mlog(ML_ERROR, "Recovery required on readonly " 637 "file system, but write access is " 638 "unavailable.\n"); 639 else 640 mlog_errno(status); 641 goto read_super_error; 642 } 643 644 ocfs2_set_ro_flag(osb, 1); 645 646 printk(KERN_NOTICE "Readonly device detected. No cluster " 647 "services will be utilized for this mount. Recovery " 648 "will be skipped.\n"); 649 } 650 651 if (!ocfs2_is_hard_readonly(osb)) { 652 if (sb->s_flags & MS_RDONLY) 653 ocfs2_set_ro_flag(osb, 0); 654 } 655 656 status = ocfs2_verify_heartbeat(osb); 657 if (status < 0) { 658 mlog_errno(status); 659 goto read_super_error; 660 } 661 662 osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, 663 ocfs2_debugfs_root); 664 if (!osb->osb_debug_root) { 665 status = -EINVAL; 666 mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); 667 goto read_super_error; 668 } 669 670 status = ocfs2_mount_volume(sb); 671 if (osb->root_inode) 672 inode = igrab(osb->root_inode); 673 674 if (status < 0) 675 goto read_super_error; 676 677 if (!inode) { 678 status = -EIO; 679 mlog_errno(status); 680 goto read_super_error; 681 } 682 683 root = d_alloc_root(inode); 684 if (!root) { 685 status = -ENOMEM; 686 mlog_errno(status); 687 goto read_super_error; 688 } 689 690 sb->s_root = root; 691 692 ocfs2_complete_mount_recovery(osb); 693 694 if (ocfs2_mount_local(osb)) 695 snprintf(nodestr, sizeof(nodestr), "local"); 696 else 697 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 698 699 printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " 700 "with %s data mode.\n", 701 osb->dev_str, nodestr, osb->slot_num, 702 osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : 703 "ordered"); 704 705 atomic_set(&osb->vol_state, VOLUME_MOUNTED); 706 wake_up(&osb->osb_mount_event); 707 708 mlog_exit(status); 709 return status; 710 711 read_super_error: 712 if (bh != NULL) 713 brelse(bh); 714 715 if (inode) 716 iput(inode); 717 718 if (osb) { 719 atomic_set(&osb->vol_state, VOLUME_DISABLED); 720 wake_up(&osb->osb_mount_event); 721 ocfs2_dismount_volume(sb, 1); 722 } 723 724 mlog_exit(status); 725 return status; 726 } 727 728 static int ocfs2_get_sb(struct file_system_type *fs_type, 729 int flags, 730 const char *dev_name, 731 void *data, 732 struct vfsmount *mnt) 733 { 734 return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, 735 mnt); 736 } 737 738 static struct file_system_type ocfs2_fs_type = { 739 .owner = THIS_MODULE, 740 .name = "ocfs2", 741 .get_sb = ocfs2_get_sb, /* is this called when we mount 742 * the fs? */ 743 .kill_sb = kill_block_super, /* set to the generic one 744 * right now, but do we 745 * need to change that? */ 746 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 747 .next = NULL 748 }; 749 750 static int ocfs2_parse_options(struct super_block *sb, 751 char *options, 752 struct mount_options *mopt, 753 int is_remount) 754 { 755 int status; 756 char *p; 757 758 mlog_entry("remount: %d, options: \"%s\"\n", is_remount, 759 options ? options : "(none)"); 760 761 mopt->commit_interval = 0; 762 mopt->mount_opt = 0; 763 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 764 mopt->slot = OCFS2_INVALID_SLOT; 765 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 766 767 if (!options) { 768 status = 1; 769 goto bail; 770 } 771 772 while ((p = strsep(&options, ",")) != NULL) { 773 int token, option; 774 substring_t args[MAX_OPT_ARGS]; 775 776 if (!*p) 777 continue; 778 779 token = match_token(p, tokens, args); 780 switch (token) { 781 case Opt_hb_local: 782 mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; 783 break; 784 case Opt_hb_none: 785 mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; 786 break; 787 case Opt_barrier: 788 if (match_int(&args[0], &option)) { 789 status = 0; 790 goto bail; 791 } 792 if (option) 793 mopt->mount_opt |= OCFS2_MOUNT_BARRIER; 794 else 795 mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER; 796 break; 797 case Opt_intr: 798 mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR; 799 break; 800 case Opt_nointr: 801 mopt->mount_opt |= OCFS2_MOUNT_NOINTR; 802 break; 803 case Opt_err_panic: 804 mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; 805 break; 806 case Opt_err_ro: 807 mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; 808 break; 809 case Opt_data_ordered: 810 mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; 811 break; 812 case Opt_data_writeback: 813 mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; 814 break; 815 case Opt_atime_quantum: 816 if (match_int(&args[0], &option)) { 817 status = 0; 818 goto bail; 819 } 820 if (option >= 0) 821 mopt->atime_quantum = option; 822 break; 823 case Opt_slot: 824 option = 0; 825 if (match_int(&args[0], &option)) { 826 status = 0; 827 goto bail; 828 } 829 if (option) 830 mopt->slot = (s16)option; 831 break; 832 case Opt_commit: 833 option = 0; 834 if (match_int(&args[0], &option)) { 835 status = 0; 836 goto bail; 837 } 838 if (option < 0) 839 return 0; 840 if (option == 0) 841 option = JBD_DEFAULT_MAX_COMMIT_AGE; 842 mopt->commit_interval = HZ * option; 843 break; 844 case Opt_localalloc: 845 option = 0; 846 if (match_int(&args[0], &option)) { 847 status = 0; 848 goto bail; 849 } 850 if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) 851 mopt->localalloc_opt = option; 852 break; 853 case Opt_localflocks: 854 /* 855 * Changing this during remount could race 856 * flock() requests, or "unbalance" existing 857 * ones (e.g., a lock is taken in one mode but 858 * dropped in the other). If users care enough 859 * to flip locking modes during remount, we 860 * could add a "local" flag to individual 861 * flock structures for proper tracking of 862 * state. 863 */ 864 if (!is_remount) 865 mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; 866 break; 867 default: 868 mlog(ML_ERROR, 869 "Unrecognized mount option \"%s\" " 870 "or missing value\n", p); 871 status = 0; 872 goto bail; 873 } 874 } 875 876 status = 1; 877 878 bail: 879 mlog_exit(status); 880 return status; 881 } 882 883 static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) 884 { 885 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); 886 unsigned long opts = osb->s_mount_opt; 887 888 if (opts & OCFS2_MOUNT_HB_LOCAL) 889 seq_printf(s, ",_netdev,heartbeat=local"); 890 else 891 seq_printf(s, ",heartbeat=none"); 892 893 if (opts & OCFS2_MOUNT_NOINTR) 894 seq_printf(s, ",nointr"); 895 896 if (opts & OCFS2_MOUNT_DATA_WRITEBACK) 897 seq_printf(s, ",data=writeback"); 898 else 899 seq_printf(s, ",data=ordered"); 900 901 if (opts & OCFS2_MOUNT_BARRIER) 902 seq_printf(s, ",barrier=1"); 903 904 if (opts & OCFS2_MOUNT_ERRORS_PANIC) 905 seq_printf(s, ",errors=panic"); 906 else 907 seq_printf(s, ",errors=remount-ro"); 908 909 if (osb->preferred_slot != OCFS2_INVALID_SLOT) 910 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); 911 912 if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) 913 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); 914 915 if (osb->osb_commit_interval) 916 seq_printf(s, ",commit=%u", 917 (unsigned) (osb->osb_commit_interval / HZ)); 918 919 if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 920 seq_printf(s, ",localalloc=%d", osb->local_alloc_size); 921 922 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 923 seq_printf(s, ",localflocks,"); 924 925 return 0; 926 } 927 928 static int __init ocfs2_init(void) 929 { 930 int status; 931 932 mlog_entry_void(); 933 934 ocfs2_print_version(); 935 936 status = init_ocfs2_uptodate_cache(); 937 if (status < 0) { 938 mlog_errno(status); 939 goto leave; 940 } 941 942 status = ocfs2_initialize_mem_caches(); 943 if (status < 0) { 944 mlog_errno(status); 945 goto leave; 946 } 947 948 ocfs2_wq = create_singlethread_workqueue("ocfs2_wq"); 949 if (!ocfs2_wq) { 950 status = -ENOMEM; 951 goto leave; 952 } 953 954 ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); 955 if (!ocfs2_debugfs_root) { 956 status = -EFAULT; 957 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 958 } 959 960 leave: 961 if (status < 0) { 962 ocfs2_free_mem_caches(); 963 exit_ocfs2_uptodate_cache(); 964 } 965 966 mlog_exit(status); 967 968 if (status >= 0) { 969 return register_filesystem(&ocfs2_fs_type); 970 } else 971 return -1; 972 } 973 974 static void __exit ocfs2_exit(void) 975 { 976 mlog_entry_void(); 977 978 if (ocfs2_wq) { 979 flush_workqueue(ocfs2_wq); 980 destroy_workqueue(ocfs2_wq); 981 } 982 983 debugfs_remove(ocfs2_debugfs_root); 984 985 ocfs2_free_mem_caches(); 986 987 unregister_filesystem(&ocfs2_fs_type); 988 989 exit_ocfs2_uptodate_cache(); 990 991 mlog_exit_void(); 992 } 993 994 static void ocfs2_put_super(struct super_block *sb) 995 { 996 mlog_entry("(0x%p)\n", sb); 997 998 ocfs2_sync_blockdev(sb); 999 ocfs2_dismount_volume(sb, 0); 1000 1001 mlog_exit_void(); 1002 } 1003 1004 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) 1005 { 1006 struct ocfs2_super *osb; 1007 u32 numbits, freebits; 1008 int status; 1009 struct ocfs2_dinode *bm_lock; 1010 struct buffer_head *bh = NULL; 1011 struct inode *inode = NULL; 1012 1013 mlog_entry("(%p, %p)\n", dentry->d_sb, buf); 1014 1015 osb = OCFS2_SB(dentry->d_sb); 1016 1017 inode = ocfs2_get_system_file_inode(osb, 1018 GLOBAL_BITMAP_SYSTEM_INODE, 1019 OCFS2_INVALID_SLOT); 1020 if (!inode) { 1021 mlog(ML_ERROR, "failed to get bitmap inode\n"); 1022 status = -EIO; 1023 goto bail; 1024 } 1025 1026 status = ocfs2_inode_lock(inode, &bh, 0); 1027 if (status < 0) { 1028 mlog_errno(status); 1029 goto bail; 1030 } 1031 1032 bm_lock = (struct ocfs2_dinode *) bh->b_data; 1033 1034 numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total); 1035 freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used); 1036 1037 buf->f_type = OCFS2_SUPER_MAGIC; 1038 buf->f_bsize = dentry->d_sb->s_blocksize; 1039 buf->f_namelen = OCFS2_MAX_FILENAME_LEN; 1040 buf->f_blocks = ((sector_t) numbits) * 1041 (osb->s_clustersize >> osb->sb->s_blocksize_bits); 1042 buf->f_bfree = ((sector_t) freebits) * 1043 (osb->s_clustersize >> osb->sb->s_blocksize_bits); 1044 buf->f_bavail = buf->f_bfree; 1045 buf->f_files = numbits; 1046 buf->f_ffree = freebits; 1047 1048 brelse(bh); 1049 1050 ocfs2_inode_unlock(inode, 0); 1051 status = 0; 1052 bail: 1053 if (inode) 1054 iput(inode); 1055 1056 mlog_exit(status); 1057 1058 return status; 1059 } 1060 1061 static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) 1062 { 1063 struct ocfs2_inode_info *oi = data; 1064 1065 oi->ip_flags = 0; 1066 oi->ip_open_count = 0; 1067 spin_lock_init(&oi->ip_lock); 1068 ocfs2_extent_map_init(&oi->vfs_inode); 1069 INIT_LIST_HEAD(&oi->ip_io_markers); 1070 oi->ip_created_trans = 0; 1071 oi->ip_last_trans = 0; 1072 oi->ip_dir_start_lookup = 0; 1073 1074 init_rwsem(&oi->ip_alloc_sem); 1075 mutex_init(&oi->ip_io_mutex); 1076 1077 oi->ip_blkno = 0ULL; 1078 oi->ip_clusters = 0; 1079 1080 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 1081 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1082 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1083 1084 ocfs2_metadata_cache_init(&oi->vfs_inode); 1085 1086 inode_init_once(&oi->vfs_inode); 1087 } 1088 1089 static int ocfs2_initialize_mem_caches(void) 1090 { 1091 ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache", 1092 sizeof(struct ocfs2_inode_info), 1093 0, 1094 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 1095 SLAB_MEM_SPREAD), 1096 ocfs2_inode_init_once); 1097 if (!ocfs2_inode_cachep) 1098 return -ENOMEM; 1099 1100 return 0; 1101 } 1102 1103 static void ocfs2_free_mem_caches(void) 1104 { 1105 if (ocfs2_inode_cachep) 1106 kmem_cache_destroy(ocfs2_inode_cachep); 1107 1108 ocfs2_inode_cachep = NULL; 1109 } 1110 1111 static int ocfs2_get_sector(struct super_block *sb, 1112 struct buffer_head **bh, 1113 int block, 1114 int sect_size) 1115 { 1116 if (!sb_set_blocksize(sb, sect_size)) { 1117 mlog(ML_ERROR, "unable to set blocksize\n"); 1118 return -EIO; 1119 } 1120 1121 *bh = sb_getblk(sb, block); 1122 if (!*bh) { 1123 mlog_errno(-EIO); 1124 return -EIO; 1125 } 1126 lock_buffer(*bh); 1127 if (!buffer_dirty(*bh)) 1128 clear_buffer_uptodate(*bh); 1129 unlock_buffer(*bh); 1130 ll_rw_block(READ, 1, bh); 1131 wait_on_buffer(*bh); 1132 return 0; 1133 } 1134 1135 /* ocfs2 1.0 only allows one cluster and node identity per kernel image. */ 1136 static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) 1137 { 1138 int status; 1139 1140 /* XXX hold a ref on the node while mounte? easy enough, if 1141 * desirable. */ 1142 if (ocfs2_mount_local(osb)) 1143 osb->node_num = 0; 1144 else 1145 osb->node_num = o2nm_this_node(); 1146 1147 if (osb->node_num == O2NM_MAX_NODES) { 1148 mlog(ML_ERROR, "could not find this host's node number\n"); 1149 status = -ENOENT; 1150 goto bail; 1151 } 1152 1153 mlog(0, "I am node %d\n", osb->node_num); 1154 1155 status = 0; 1156 bail: 1157 return status; 1158 } 1159 1160 static int ocfs2_mount_volume(struct super_block *sb) 1161 { 1162 int status = 0; 1163 int unlock_super = 0; 1164 struct ocfs2_super *osb = OCFS2_SB(sb); 1165 1166 mlog_entry_void(); 1167 1168 if (ocfs2_is_hard_readonly(osb)) 1169 goto leave; 1170 1171 status = ocfs2_fill_local_node_info(osb); 1172 if (status < 0) { 1173 mlog_errno(status); 1174 goto leave; 1175 } 1176 1177 status = ocfs2_dlm_init(osb); 1178 if (status < 0) { 1179 mlog_errno(status); 1180 goto leave; 1181 } 1182 1183 status = ocfs2_super_lock(osb, 1); 1184 if (status < 0) { 1185 mlog_errno(status); 1186 goto leave; 1187 } 1188 unlock_super = 1; 1189 1190 /* This will load up the node map and add ourselves to it. */ 1191 status = ocfs2_find_slot(osb); 1192 if (status < 0) { 1193 mlog_errno(status); 1194 goto leave; 1195 } 1196 1197 /* load all node-local system inodes */ 1198 status = ocfs2_init_local_system_inodes(osb); 1199 if (status < 0) { 1200 mlog_errno(status); 1201 goto leave; 1202 } 1203 1204 status = ocfs2_check_volume(osb); 1205 if (status < 0) { 1206 mlog_errno(status); 1207 goto leave; 1208 } 1209 1210 status = ocfs2_truncate_log_init(osb); 1211 if (status < 0) { 1212 mlog_errno(status); 1213 goto leave; 1214 } 1215 1216 if (ocfs2_mount_local(osb)) 1217 goto leave; 1218 1219 leave: 1220 if (unlock_super) 1221 ocfs2_super_unlock(osb, 1); 1222 1223 mlog_exit(status); 1224 return status; 1225 } 1226 1227 /* we can't grab the goofy sem lock from inside wait_event, so we use 1228 * memory barriers to make sure that we'll see the null task before 1229 * being woken up */ 1230 static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) 1231 { 1232 mb(); 1233 return osb->recovery_thread_task != NULL; 1234 } 1235 1236 static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) 1237 { 1238 int tmp; 1239 struct ocfs2_super *osb = NULL; 1240 char nodestr[8]; 1241 1242 mlog_entry("(0x%p)\n", sb); 1243 1244 BUG_ON(!sb); 1245 osb = OCFS2_SB(sb); 1246 BUG_ON(!osb); 1247 1248 ocfs2_shutdown_local_alloc(osb); 1249 1250 ocfs2_truncate_log_shutdown(osb); 1251 1252 /* disable any new recovery threads and wait for any currently 1253 * running ones to exit. Do this before setting the vol_state. */ 1254 mutex_lock(&osb->recovery_lock); 1255 osb->disable_recovery = 1; 1256 mutex_unlock(&osb->recovery_lock); 1257 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); 1258 1259 /* At this point, we know that no more recovery threads can be 1260 * launched, so wait for any recovery completion work to 1261 * complete. */ 1262 flush_workqueue(ocfs2_wq); 1263 1264 ocfs2_journal_shutdown(osb); 1265 1266 ocfs2_sync_blockdev(sb); 1267 1268 /* No dlm means we've failed during mount, so skip all the 1269 * steps which depended on that to complete. */ 1270 if (osb->dlm) { 1271 tmp = ocfs2_super_lock(osb, 1); 1272 if (tmp < 0) { 1273 mlog_errno(tmp); 1274 return; 1275 } 1276 } 1277 1278 if (osb->slot_num != OCFS2_INVALID_SLOT) 1279 ocfs2_put_slot(osb); 1280 1281 if (osb->dlm) 1282 ocfs2_super_unlock(osb, 1); 1283 1284 ocfs2_release_system_inodes(osb); 1285 1286 if (osb->dlm) 1287 ocfs2_dlm_shutdown(osb); 1288 1289 debugfs_remove(osb->osb_debug_root); 1290 1291 if (!mnt_err) 1292 ocfs2_stop_heartbeat(osb); 1293 1294 atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); 1295 1296 if (ocfs2_mount_local(osb)) 1297 snprintf(nodestr, sizeof(nodestr), "local"); 1298 else 1299 snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); 1300 1301 printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", 1302 osb->dev_str, nodestr); 1303 1304 ocfs2_delete_osb(osb); 1305 kfree(osb); 1306 sb->s_dev = 0; 1307 sb->s_fs_info = NULL; 1308 } 1309 1310 static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uuid, 1311 unsigned uuid_bytes) 1312 { 1313 int i, ret; 1314 char *ptr; 1315 1316 BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN); 1317 1318 osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL); 1319 if (osb->uuid_str == NULL) 1320 return -ENOMEM; 1321 1322 for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) { 1323 /* print with null */ 1324 ret = snprintf(ptr, 3, "%02X", uuid[i]); 1325 if (ret != 2) /* drop super cleans up */ 1326 return -EINVAL; 1327 /* then only advance past the last char */ 1328 ptr += 2; 1329 } 1330 1331 return 0; 1332 } 1333 1334 static int ocfs2_initialize_super(struct super_block *sb, 1335 struct buffer_head *bh, 1336 int sector_size) 1337 { 1338 int status; 1339 int i, cbits, bbits; 1340 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1341 struct inode *inode = NULL; 1342 struct ocfs2_journal *journal; 1343 __le32 uuid_net_key; 1344 struct ocfs2_super *osb; 1345 1346 mlog_entry_void(); 1347 1348 osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL); 1349 if (!osb) { 1350 status = -ENOMEM; 1351 mlog_errno(status); 1352 goto bail; 1353 } 1354 1355 sb->s_fs_info = osb; 1356 sb->s_op = &ocfs2_sops; 1357 sb->s_export_op = &ocfs2_export_ops; 1358 osb->osb_locking_proto = ocfs2_locking_protocol; 1359 sb->s_time_gran = 1; 1360 sb->s_flags |= MS_NOATIME; 1361 /* this is needed to support O_LARGEFILE */ 1362 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); 1363 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); 1364 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); 1365 1366 osb->sb = sb; 1367 /* Save off for ocfs2_rw_direct */ 1368 osb->s_sectsize_bits = blksize_bits(sector_size); 1369 BUG_ON(!osb->s_sectsize_bits); 1370 1371 init_waitqueue_head(&osb->recovery_event); 1372 spin_lock_init(&osb->dc_task_lock); 1373 init_waitqueue_head(&osb->dc_event); 1374 osb->dc_work_sequence = 0; 1375 osb->dc_wake_sequence = 0; 1376 INIT_LIST_HEAD(&osb->blocked_lock_list); 1377 osb->blocked_lock_count = 0; 1378 spin_lock_init(&osb->osb_lock); 1379 1380 atomic_set(&osb->alloc_stats.moves, 0); 1381 atomic_set(&osb->alloc_stats.local_data, 0); 1382 atomic_set(&osb->alloc_stats.bitmap_data, 0); 1383 atomic_set(&osb->alloc_stats.bg_allocs, 0); 1384 atomic_set(&osb->alloc_stats.bg_extends, 0); 1385 1386 ocfs2_init_node_maps(osb); 1387 1388 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 1389 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1390 1391 mutex_init(&osb->recovery_lock); 1392 1393 osb->disable_recovery = 0; 1394 osb->recovery_thread_task = NULL; 1395 1396 init_waitqueue_head(&osb->checkpoint_event); 1397 atomic_set(&osb->needs_checkpoint, 0); 1398 1399 osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 1400 1401 osb->node_num = O2NM_INVALID_NODE_NUM; 1402 osb->slot_num = OCFS2_INVALID_SLOT; 1403 1404 osb->local_alloc_state = OCFS2_LA_UNUSED; 1405 osb->local_alloc_bh = NULL; 1406 1407 ocfs2_setup_hb_callbacks(osb); 1408 1409 init_waitqueue_head(&osb->osb_mount_event); 1410 1411 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 1412 if (!osb->vol_label) { 1413 mlog(ML_ERROR, "unable to alloc vol label\n"); 1414 status = -ENOMEM; 1415 goto bail; 1416 } 1417 1418 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); 1419 if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { 1420 mlog(ML_ERROR, "Invalid number of node slots (%u)\n", 1421 osb->max_slots); 1422 status = -EINVAL; 1423 goto bail; 1424 } 1425 mlog(0, "max_slots for this device: %u\n", osb->max_slots); 1426 1427 init_waitqueue_head(&osb->osb_wipe_event); 1428 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1429 sizeof(*osb->osb_orphan_wipes), 1430 GFP_KERNEL); 1431 if (!osb->osb_orphan_wipes) { 1432 status = -ENOMEM; 1433 mlog_errno(status); 1434 goto bail; 1435 } 1436 1437 osb->s_feature_compat = 1438 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); 1439 osb->s_feature_ro_compat = 1440 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat); 1441 osb->s_feature_incompat = 1442 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat); 1443 1444 if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) { 1445 mlog(ML_ERROR, "couldn't mount because of unsupported " 1446 "optional features (%x).\n", i); 1447 status = -EINVAL; 1448 goto bail; 1449 } 1450 if (!(osb->sb->s_flags & MS_RDONLY) && 1451 (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) { 1452 mlog(ML_ERROR, "couldn't mount RDWR because of " 1453 "unsupported optional features (%x).\n", i); 1454 status = -EINVAL; 1455 goto bail; 1456 } 1457 1458 get_random_bytes(&osb->s_next_generation, sizeof(u32)); 1459 1460 /* FIXME 1461 * This should be done in ocfs2_journal_init(), but unknown 1462 * ordering issues will cause the filesystem to crash. 1463 * If anyone wants to figure out what part of the code 1464 * refers to osb->journal before ocfs2_journal_init() is run, 1465 * be my guest. 1466 */ 1467 /* initialize our journal structure */ 1468 1469 journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); 1470 if (!journal) { 1471 mlog(ML_ERROR, "unable to alloc journal\n"); 1472 status = -ENOMEM; 1473 goto bail; 1474 } 1475 osb->journal = journal; 1476 journal->j_osb = osb; 1477 1478 atomic_set(&journal->j_num_trans, 0); 1479 init_rwsem(&journal->j_trans_barrier); 1480 init_waitqueue_head(&journal->j_checkpointed); 1481 spin_lock_init(&journal->j_lock); 1482 journal->j_trans_id = (unsigned long) 1; 1483 INIT_LIST_HEAD(&journal->j_la_cleanups); 1484 INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); 1485 journal->j_state = OCFS2_JOURNAL_FREE; 1486 1487 /* get some pseudo constants for clustersize bits */ 1488 osb->s_clustersize_bits = 1489 le32_to_cpu(di->id2.i_super.s_clustersize_bits); 1490 osb->s_clustersize = 1 << osb->s_clustersize_bits; 1491 mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits); 1492 1493 if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE || 1494 osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) { 1495 mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n", 1496 osb->s_clustersize); 1497 status = -EINVAL; 1498 goto bail; 1499 } 1500 1501 if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) 1502 > (u32)~0UL) { 1503 mlog(ML_ERROR, "Volume might try to write to blocks beyond " 1504 "what jbd can address in 32 bits.\n"); 1505 status = -EINVAL; 1506 goto bail; 1507 } 1508 1509 if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid, 1510 sizeof(di->id2.i_super.s_uuid))) { 1511 mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n"); 1512 status = -ENOMEM; 1513 goto bail; 1514 } 1515 1516 memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); 1517 1518 strncpy(osb->vol_label, di->id2.i_super.s_label, 63); 1519 osb->vol_label[63] = '\0'; 1520 osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); 1521 osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno); 1522 osb->first_cluster_group_blkno = 1523 le64_to_cpu(di->id2.i_super.s_first_cluster_group); 1524 osb->fs_generation = le32_to_cpu(di->i_fs_generation); 1525 mlog(0, "vol_label: %s\n", osb->vol_label); 1526 mlog(0, "uuid: %s\n", osb->uuid_str); 1527 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", 1528 (unsigned long long)osb->root_blkno, 1529 (unsigned long long)osb->system_dir_blkno); 1530 1531 osb->osb_dlm_debug = ocfs2_new_dlm_debug(); 1532 if (!osb->osb_dlm_debug) { 1533 status = -ENOMEM; 1534 mlog_errno(status); 1535 goto bail; 1536 } 1537 1538 atomic_set(&osb->vol_state, VOLUME_INIT); 1539 1540 /* load root, system_dir, and all global system inodes */ 1541 status = ocfs2_init_global_system_inodes(osb); 1542 if (status < 0) { 1543 mlog_errno(status); 1544 goto bail; 1545 } 1546 1547 /* 1548 * global bitmap 1549 */ 1550 inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, 1551 OCFS2_INVALID_SLOT); 1552 if (!inode) { 1553 status = -EINVAL; 1554 mlog_errno(status); 1555 goto bail; 1556 } 1557 1558 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 1559 iput(inode); 1560 1561 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; 1562 1563 status = ocfs2_init_slot_info(osb); 1564 if (status < 0) { 1565 mlog_errno(status); 1566 goto bail; 1567 } 1568 1569 bail: 1570 mlog_exit(status); 1571 return status; 1572 } 1573 1574 /* 1575 * will return: -EAGAIN if it is ok to keep searching for superblocks 1576 * -EINVAL if there is a bad superblock 1577 * 0 on success 1578 */ 1579 static int ocfs2_verify_volume(struct ocfs2_dinode *di, 1580 struct buffer_head *bh, 1581 u32 blksz) 1582 { 1583 int status = -EAGAIN; 1584 1585 mlog_entry_void(); 1586 1587 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, 1588 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { 1589 status = -EINVAL; 1590 if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { 1591 mlog(ML_ERROR, "found superblock with incorrect block " 1592 "size: found %u, should be %u\n", 1593 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits), 1594 blksz); 1595 } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != 1596 OCFS2_MAJOR_REV_LEVEL || 1597 le16_to_cpu(di->id2.i_super.s_minor_rev_level) != 1598 OCFS2_MINOR_REV_LEVEL) { 1599 mlog(ML_ERROR, "found superblock with bad version: " 1600 "found %u.%u, should be %u.%u\n", 1601 le16_to_cpu(di->id2.i_super.s_major_rev_level), 1602 le16_to_cpu(di->id2.i_super.s_minor_rev_level), 1603 OCFS2_MAJOR_REV_LEVEL, 1604 OCFS2_MINOR_REV_LEVEL); 1605 } else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) { 1606 mlog(ML_ERROR, "bad block number on superblock: " 1607 "found %llu, should be %llu\n", 1608 (unsigned long long)le64_to_cpu(di->i_blkno), 1609 (unsigned long long)bh->b_blocknr); 1610 } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 || 1611 le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) { 1612 mlog(ML_ERROR, "bad cluster size found: %u\n", 1613 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits)); 1614 } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) { 1615 mlog(ML_ERROR, "bad root_blkno: 0\n"); 1616 } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) { 1617 mlog(ML_ERROR, "bad system_dir_blkno: 0\n"); 1618 } else if (le16_to_cpu(di->id2.i_super.s_max_slots) > OCFS2_MAX_SLOTS) { 1619 mlog(ML_ERROR, 1620 "Superblock slots found greater than file system " 1621 "maximum: found %u, max %u\n", 1622 le16_to_cpu(di->id2.i_super.s_max_slots), 1623 OCFS2_MAX_SLOTS); 1624 } else { 1625 /* found it! */ 1626 status = 0; 1627 } 1628 } 1629 1630 mlog_exit(status); 1631 return status; 1632 } 1633 1634 static int ocfs2_check_volume(struct ocfs2_super *osb) 1635 { 1636 int status; 1637 int dirty; 1638 int local; 1639 struct ocfs2_dinode *local_alloc = NULL; /* only used if we 1640 * recover 1641 * ourselves. */ 1642 1643 mlog_entry_void(); 1644 1645 /* Init our journal object. */ 1646 status = ocfs2_journal_init(osb->journal, &dirty); 1647 if (status < 0) { 1648 mlog(ML_ERROR, "Could not initialize journal!\n"); 1649 goto finally; 1650 } 1651 1652 /* If the journal was unmounted cleanly then we don't want to 1653 * recover anything. Otherwise, journal_load will do that 1654 * dirty work for us :) */ 1655 if (!dirty) { 1656 status = ocfs2_journal_wipe(osb->journal, 0); 1657 if (status < 0) { 1658 mlog_errno(status); 1659 goto finally; 1660 } 1661 } else { 1662 mlog(ML_NOTICE, "File system was not unmounted cleanly, " 1663 "recovering volume.\n"); 1664 } 1665 1666 local = ocfs2_mount_local(osb); 1667 1668 /* will play back anything left in the journal. */ 1669 ocfs2_journal_load(osb->journal, local); 1670 1671 if (dirty) { 1672 /* recover my local alloc if we didn't unmount cleanly. */ 1673 status = ocfs2_begin_local_alloc_recovery(osb, 1674 osb->slot_num, 1675 &local_alloc); 1676 if (status < 0) { 1677 mlog_errno(status); 1678 goto finally; 1679 } 1680 /* we complete the recovery process after we've marked 1681 * ourselves as mounted. */ 1682 } 1683 1684 mlog(0, "Journal loaded.\n"); 1685 1686 status = ocfs2_load_local_alloc(osb); 1687 if (status < 0) { 1688 mlog_errno(status); 1689 goto finally; 1690 } 1691 1692 if (dirty) { 1693 /* Recovery will be completed after we've mounted the 1694 * rest of the volume. */ 1695 osb->dirty = 1; 1696 osb->local_alloc_copy = local_alloc; 1697 local_alloc = NULL; 1698 } 1699 1700 /* go through each journal, trylock it and if you get the 1701 * lock, and it's marked as dirty, set the bit in the recover 1702 * map and launch a recovery thread for it. */ 1703 status = ocfs2_mark_dead_nodes(osb); 1704 if (status < 0) 1705 mlog_errno(status); 1706 1707 finally: 1708 if (local_alloc) 1709 kfree(local_alloc); 1710 1711 mlog_exit(status); 1712 return status; 1713 } 1714 1715 /* 1716 * The routine gets called from dismount or close whenever a dismount on 1717 * volume is requested and the osb open count becomes 1. 1718 * It will remove the osb from the global list and also free up all the 1719 * initialized resources and fileobject. 1720 */ 1721 static void ocfs2_delete_osb(struct ocfs2_super *osb) 1722 { 1723 mlog_entry_void(); 1724 1725 /* This function assumes that the caller has the main osb resource */ 1726 1727 ocfs2_free_slot_info(osb); 1728 1729 kfree(osb->osb_orphan_wipes); 1730 /* FIXME 1731 * This belongs in journal shutdown, but because we have to 1732 * allocate osb->journal at the start of ocfs2_initalize_osb(), 1733 * we free it here. 1734 */ 1735 kfree(osb->journal); 1736 if (osb->local_alloc_copy) 1737 kfree(osb->local_alloc_copy); 1738 kfree(osb->uuid_str); 1739 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 1740 memset(osb, 0, sizeof(struct ocfs2_super)); 1741 1742 mlog_exit_void(); 1743 } 1744 1745 /* Put OCFS2 into a readonly state, or (if the user specifies it), 1746 * panic(). We do not support continue-on-error operation. */ 1747 static void ocfs2_handle_error(struct super_block *sb) 1748 { 1749 struct ocfs2_super *osb = OCFS2_SB(sb); 1750 1751 if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC) 1752 panic("OCFS2: (device %s): panic forced after error\n", 1753 sb->s_id); 1754 1755 ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS); 1756 1757 if (sb->s_flags & MS_RDONLY && 1758 (ocfs2_is_soft_readonly(osb) || 1759 ocfs2_is_hard_readonly(osb))) 1760 return; 1761 1762 printk(KERN_CRIT "File system is now read-only due to the potential " 1763 "of on-disk corruption. Please run fsck.ocfs2 once the file " 1764 "system is unmounted.\n"); 1765 sb->s_flags |= MS_RDONLY; 1766 ocfs2_set_ro_flag(osb, 0); 1767 } 1768 1769 static char error_buf[1024]; 1770 1771 void __ocfs2_error(struct super_block *sb, 1772 const char *function, 1773 const char *fmt, ...) 1774 { 1775 va_list args; 1776 1777 va_start(args, fmt); 1778 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 1779 va_end(args); 1780 1781 /* Not using mlog here because we want to show the actual 1782 * function the error came from. */ 1783 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", 1784 sb->s_id, function, error_buf); 1785 1786 ocfs2_handle_error(sb); 1787 } 1788 1789 /* Handle critical errors. This is intentionally more drastic than 1790 * ocfs2_handle_error, so we only use for things like journal errors, 1791 * etc. */ 1792 void __ocfs2_abort(struct super_block* sb, 1793 const char *function, 1794 const char *fmt, ...) 1795 { 1796 va_list args; 1797 1798 va_start(args, fmt); 1799 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 1800 va_end(args); 1801 1802 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", 1803 sb->s_id, function, error_buf); 1804 1805 /* We don't have the cluster support yet to go straight to 1806 * hard readonly in here. Until then, we want to keep 1807 * ocfs2_abort() so that we can at least mark critical 1808 * errors. 1809 * 1810 * TODO: This should abort the journal and alert other nodes 1811 * that our slot needs recovery. */ 1812 1813 /* Force a panic(). This stinks, but it's better than letting 1814 * things continue without having a proper hard readonly 1815 * here. */ 1816 OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; 1817 ocfs2_handle_error(sb); 1818 } 1819 1820 module_init(ocfs2_init); 1821 module_exit(ocfs2_exit); 1822