1 2 #include <linux/ceph/ceph_debug.h> 3 4 #include <linux/backing-dev.h> 5 #include <linux/ctype.h> 6 #include <linux/fs.h> 7 #include <linux/inet.h> 8 #include <linux/in6.h> 9 #include <linux/module.h> 10 #include <linux/mount.h> 11 #include <linux/parser.h> 12 #include <linux/sched.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/statfs.h> 16 #include <linux/string.h> 17 18 #include "super.h" 19 #include "mds_client.h" 20 21 #include <linux/ceph/decode.h> 22 #include <linux/ceph/mon_client.h> 23 #include <linux/ceph/auth.h> 24 #include <linux/ceph/debugfs.h> 25 26 /* 27 * Ceph superblock operations 28 * 29 * Handle the basics of mounting, unmounting. 30 */ 31 32 /* 33 * super ops 34 */ 35 static void ceph_put_super(struct super_block *s) 36 { 37 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 38 39 dout("put_super\n"); 40 ceph_mdsc_close_sessions(fsc->mdsc); 41 42 /* 43 * ensure we release the bdi before put_anon_super releases 44 * the device name. 45 */ 46 if (s->s_bdi == &fsc->backing_dev_info) { 47 bdi_unregister(&fsc->backing_dev_info); 48 s->s_bdi = NULL; 49 } 50 51 return; 52 } 53 54 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 55 { 56 struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); 57 struct ceph_monmap *monmap = fsc->client->monc.monmap; 58 struct ceph_statfs st; 59 u64 fsid; 60 int err; 61 62 dout("statfs\n"); 63 err = ceph_monc_do_statfs(&fsc->client->monc, &st); 64 if (err < 0) 65 return err; 66 67 /* fill in kstatfs */ 68 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 69 70 /* 71 * express utilization in terms of large blocks to avoid 72 * overflow on 32-bit machines. 73 */ 74 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 75 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 76 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 77 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 78 79 buf->f_files = le64_to_cpu(st.num_objects); 80 buf->f_ffree = -1; 81 buf->f_namelen = NAME_MAX; 82 buf->f_frsize = PAGE_CACHE_SIZE; 83 84 /* leave fsid little-endian, regardless of host endianness */ 85 fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); 86 buf->f_fsid.val[0] = fsid & 0xffffffff; 87 buf->f_fsid.val[1] = fsid >> 32; 88 89 return 0; 90 } 91 92 93 static int ceph_sync_fs(struct super_block *sb, int wait) 94 { 95 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 96 97 if (!wait) { 98 dout("sync_fs (non-blocking)\n"); 99 ceph_flush_dirty_caps(fsc->mdsc); 100 dout("sync_fs (non-blocking) done\n"); 101 return 0; 102 } 103 104 dout("sync_fs (blocking)\n"); 105 ceph_osdc_sync(&fsc->client->osdc); 106 ceph_mdsc_sync(fsc->mdsc); 107 dout("sync_fs (blocking) done\n"); 108 return 0; 109 } 110 111 /* 112 * mount options 113 */ 114 enum { 115 Opt_wsize, 116 Opt_rsize, 117 Opt_rasize, 118 Opt_caps_wanted_delay_min, 119 Opt_caps_wanted_delay_max, 120 Opt_cap_release_safety, 121 Opt_readdir_max_entries, 122 Opt_readdir_max_bytes, 123 Opt_congestion_kb, 124 Opt_last_int, 125 /* int args above */ 126 Opt_snapdirname, 127 Opt_last_string, 128 /* string args above */ 129 Opt_dirstat, 130 Opt_nodirstat, 131 Opt_rbytes, 132 Opt_norbytes, 133 Opt_asyncreaddir, 134 Opt_noasyncreaddir, 135 Opt_dcache, 136 Opt_nodcache, 137 Opt_ino32, 138 Opt_noino32, 139 }; 140 141 static match_table_t fsopt_tokens = { 142 {Opt_wsize, "wsize=%d"}, 143 {Opt_rsize, "rsize=%d"}, 144 {Opt_rasize, "rasize=%d"}, 145 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 146 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 147 {Opt_cap_release_safety, "cap_release_safety=%d"}, 148 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 149 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 150 {Opt_congestion_kb, "write_congestion_kb=%d"}, 151 /* int args above */ 152 {Opt_snapdirname, "snapdirname=%s"}, 153 /* string args above */ 154 {Opt_dirstat, "dirstat"}, 155 {Opt_nodirstat, "nodirstat"}, 156 {Opt_rbytes, "rbytes"}, 157 {Opt_norbytes, "norbytes"}, 158 {Opt_asyncreaddir, "asyncreaddir"}, 159 {Opt_noasyncreaddir, "noasyncreaddir"}, 160 {Opt_dcache, "dcache"}, 161 {Opt_nodcache, "nodcache"}, 162 {Opt_ino32, "ino32"}, 163 {Opt_noino32, "noino32"}, 164 {-1, NULL} 165 }; 166 167 static int parse_fsopt_token(char *c, void *private) 168 { 169 struct ceph_mount_options *fsopt = private; 170 substring_t argstr[MAX_OPT_ARGS]; 171 int token, intval, ret; 172 173 token = match_token((char *)c, fsopt_tokens, argstr); 174 if (token < 0) 175 return -EINVAL; 176 177 if (token < Opt_last_int) { 178 ret = match_int(&argstr[0], &intval); 179 if (ret < 0) { 180 pr_err("bad mount option arg (not int) " 181 "at '%s'\n", c); 182 return ret; 183 } 184 dout("got int token %d val %d\n", token, intval); 185 } else if (token > Opt_last_int && token < Opt_last_string) { 186 dout("got string token %d val %s\n", token, 187 argstr[0].from); 188 } else { 189 dout("got token %d\n", token); 190 } 191 192 switch (token) { 193 case Opt_snapdirname: 194 kfree(fsopt->snapdir_name); 195 fsopt->snapdir_name = kstrndup(argstr[0].from, 196 argstr[0].to-argstr[0].from, 197 GFP_KERNEL); 198 if (!fsopt->snapdir_name) 199 return -ENOMEM; 200 break; 201 202 /* misc */ 203 case Opt_wsize: 204 fsopt->wsize = intval; 205 break; 206 case Opt_rsize: 207 fsopt->rsize = intval; 208 break; 209 case Opt_rasize: 210 fsopt->rasize = intval; 211 break; 212 case Opt_caps_wanted_delay_min: 213 fsopt->caps_wanted_delay_min = intval; 214 break; 215 case Opt_caps_wanted_delay_max: 216 fsopt->caps_wanted_delay_max = intval; 217 break; 218 case Opt_readdir_max_entries: 219 fsopt->max_readdir = intval; 220 break; 221 case Opt_readdir_max_bytes: 222 fsopt->max_readdir_bytes = intval; 223 break; 224 case Opt_congestion_kb: 225 fsopt->congestion_kb = intval; 226 break; 227 case Opt_dirstat: 228 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 229 break; 230 case Opt_nodirstat: 231 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 232 break; 233 case Opt_rbytes: 234 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 235 break; 236 case Opt_norbytes: 237 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 238 break; 239 case Opt_asyncreaddir: 240 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 241 break; 242 case Opt_noasyncreaddir: 243 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 244 break; 245 case Opt_dcache: 246 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 247 break; 248 case Opt_nodcache: 249 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 250 break; 251 case Opt_ino32: 252 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 253 break; 254 case Opt_noino32: 255 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 256 break; 257 default: 258 BUG_ON(token); 259 } 260 return 0; 261 } 262 263 static void destroy_mount_options(struct ceph_mount_options *args) 264 { 265 dout("destroy_mount_options %p\n", args); 266 kfree(args->snapdir_name); 267 kfree(args); 268 } 269 270 static int strcmp_null(const char *s1, const char *s2) 271 { 272 if (!s1 && !s2) 273 return 0; 274 if (s1 && !s2) 275 return -1; 276 if (!s1 && s2) 277 return 1; 278 return strcmp(s1, s2); 279 } 280 281 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 282 struct ceph_options *new_opt, 283 struct ceph_fs_client *fsc) 284 { 285 struct ceph_mount_options *fsopt1 = new_fsopt; 286 struct ceph_mount_options *fsopt2 = fsc->mount_options; 287 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 288 int ret; 289 290 ret = memcmp(fsopt1, fsopt2, ofs); 291 if (ret) 292 return ret; 293 294 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 295 if (ret) 296 return ret; 297 298 return ceph_compare_options(new_opt, fsc->client); 299 } 300 301 static int parse_mount_options(struct ceph_mount_options **pfsopt, 302 struct ceph_options **popt, 303 int flags, char *options, 304 const char *dev_name, 305 const char **path) 306 { 307 struct ceph_mount_options *fsopt; 308 const char *dev_name_end; 309 int err = -ENOMEM; 310 311 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 312 if (!fsopt) 313 return -ENOMEM; 314 315 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 316 317 fsopt->sb_flags = flags; 318 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 319 320 fsopt->rsize = CEPH_RSIZE_DEFAULT; 321 fsopt->rasize = CEPH_RASIZE_DEFAULT; 322 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 323 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 324 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 325 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 326 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 327 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 328 fsopt->congestion_kb = default_congestion_kb(); 329 330 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ 331 err = -EINVAL; 332 if (!dev_name) 333 goto out; 334 *path = strstr(dev_name, ":/"); 335 if (*path == NULL) { 336 pr_err("device name is missing path (no :/ in %s)\n", 337 dev_name); 338 goto out; 339 } 340 dev_name_end = *path; 341 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 342 343 /* path on server */ 344 *path += 2; 345 dout("server path '%s'\n", *path); 346 347 *popt = ceph_parse_options(options, dev_name, dev_name_end, 348 parse_fsopt_token, (void *)fsopt); 349 if (IS_ERR(*popt)) { 350 err = PTR_ERR(*popt); 351 goto out; 352 } 353 354 /* success */ 355 *pfsopt = fsopt; 356 return 0; 357 358 out: 359 destroy_mount_options(fsopt); 360 return err; 361 } 362 363 /** 364 * ceph_show_options - Show mount options in /proc/mounts 365 * @m: seq_file to write to 366 * @root: root of that (sub)tree 367 */ 368 static int ceph_show_options(struct seq_file *m, struct dentry *root) 369 { 370 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 371 struct ceph_mount_options *fsopt = fsc->mount_options; 372 struct ceph_options *opt = fsc->client->options; 373 374 if (opt->flags & CEPH_OPT_FSID) 375 seq_printf(m, ",fsid=%pU", &opt->fsid); 376 if (opt->flags & CEPH_OPT_NOSHARE) 377 seq_puts(m, ",noshare"); 378 if (opt->flags & CEPH_OPT_NOCRC) 379 seq_puts(m, ",nocrc"); 380 381 if (opt->name) 382 seq_printf(m, ",name=%s", opt->name); 383 if (opt->key) 384 seq_puts(m, ",secret=<hidden>"); 385 386 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 387 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); 388 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) 389 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); 390 if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) 391 seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); 392 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 393 seq_printf(m, ",osdkeepalivetimeout=%d", 394 opt->osd_keepalive_timeout); 395 396 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 397 seq_puts(m, ",dirstat"); 398 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0) 399 seq_puts(m, ",norbytes"); 400 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 401 seq_puts(m, ",noasyncreaddir"); 402 if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) 403 seq_puts(m, ",dcache"); 404 else 405 seq_puts(m, ",nodcache"); 406 407 if (fsopt->wsize) 408 seq_printf(m, ",wsize=%d", fsopt->wsize); 409 if (fsopt->rsize != CEPH_RSIZE_DEFAULT) 410 seq_printf(m, ",rsize=%d", fsopt->rsize); 411 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 412 seq_printf(m, ",rasize=%d", fsopt->rasize); 413 if (fsopt->congestion_kb != default_congestion_kb()) 414 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 415 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 416 seq_printf(m, ",caps_wanted_delay_min=%d", 417 fsopt->caps_wanted_delay_min); 418 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 419 seq_printf(m, ",caps_wanted_delay_max=%d", 420 fsopt->caps_wanted_delay_max); 421 if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) 422 seq_printf(m, ",cap_release_safety=%d", 423 fsopt->cap_release_safety); 424 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 425 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 426 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 427 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 428 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 429 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); 430 return 0; 431 } 432 433 /* 434 * handle any mon messages the standard library doesn't understand. 435 * return error if we don't either. 436 */ 437 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 438 { 439 struct ceph_fs_client *fsc = client->private; 440 int type = le16_to_cpu(msg->hdr.type); 441 442 switch (type) { 443 case CEPH_MSG_MDS_MAP: 444 ceph_mdsc_handle_map(fsc->mdsc, msg); 445 return 0; 446 447 default: 448 return -1; 449 } 450 } 451 452 /* 453 * create a new fs client 454 */ 455 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 456 struct ceph_options *opt) 457 { 458 struct ceph_fs_client *fsc; 459 const unsigned supported_features = 460 CEPH_FEATURE_FLOCK | 461 CEPH_FEATURE_DIRLAYOUTHASH; 462 const unsigned required_features = 0; 463 int err = -ENOMEM; 464 465 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 466 if (!fsc) 467 return ERR_PTR(-ENOMEM); 468 469 fsc->client = ceph_create_client(opt, fsc, supported_features, 470 required_features); 471 if (IS_ERR(fsc->client)) { 472 err = PTR_ERR(fsc->client); 473 goto fail; 474 } 475 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 476 fsc->client->monc.want_mdsmap = 1; 477 478 fsc->mount_options = fsopt; 479 480 fsc->sb = NULL; 481 fsc->mount_state = CEPH_MOUNT_MOUNTING; 482 483 atomic_long_set(&fsc->writeback_count, 0); 484 485 err = bdi_init(&fsc->backing_dev_info); 486 if (err < 0) 487 goto fail_client; 488 489 err = -ENOMEM; 490 /* 491 * The number of concurrent works can be high but they don't need 492 * to be processed in parallel, limit concurrency. 493 */ 494 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 495 if (fsc->wb_wq == NULL) 496 goto fail_bdi; 497 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 498 if (fsc->pg_inv_wq == NULL) 499 goto fail_wb_wq; 500 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 501 if (fsc->trunc_wq == NULL) 502 goto fail_pg_inv_wq; 503 504 /* set up mempools */ 505 err = -ENOMEM; 506 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, 507 fsc->mount_options->wsize >> PAGE_CACHE_SHIFT); 508 if (!fsc->wb_pagevec_pool) 509 goto fail_trunc_wq; 510 511 /* caps */ 512 fsc->min_caps = fsopt->max_readdir; 513 514 return fsc; 515 516 fail_trunc_wq: 517 destroy_workqueue(fsc->trunc_wq); 518 fail_pg_inv_wq: 519 destroy_workqueue(fsc->pg_inv_wq); 520 fail_wb_wq: 521 destroy_workqueue(fsc->wb_wq); 522 fail_bdi: 523 bdi_destroy(&fsc->backing_dev_info); 524 fail_client: 525 ceph_destroy_client(fsc->client); 526 fail: 527 kfree(fsc); 528 return ERR_PTR(err); 529 } 530 531 static void destroy_fs_client(struct ceph_fs_client *fsc) 532 { 533 dout("destroy_fs_client %p\n", fsc); 534 535 destroy_workqueue(fsc->wb_wq); 536 destroy_workqueue(fsc->pg_inv_wq); 537 destroy_workqueue(fsc->trunc_wq); 538 539 bdi_destroy(&fsc->backing_dev_info); 540 541 mempool_destroy(fsc->wb_pagevec_pool); 542 543 destroy_mount_options(fsc->mount_options); 544 545 ceph_fs_debugfs_cleanup(fsc); 546 547 ceph_destroy_client(fsc->client); 548 549 kfree(fsc); 550 dout("destroy_fs_client %p done\n", fsc); 551 } 552 553 /* 554 * caches 555 */ 556 struct kmem_cache *ceph_inode_cachep; 557 struct kmem_cache *ceph_cap_cachep; 558 struct kmem_cache *ceph_dentry_cachep; 559 struct kmem_cache *ceph_file_cachep; 560 561 static void ceph_inode_init_once(void *foo) 562 { 563 struct ceph_inode_info *ci = foo; 564 inode_init_once(&ci->vfs_inode); 565 } 566 567 static int __init init_caches(void) 568 { 569 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 570 sizeof(struct ceph_inode_info), 571 __alignof__(struct ceph_inode_info), 572 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), 573 ceph_inode_init_once); 574 if (ceph_inode_cachep == NULL) 575 return -ENOMEM; 576 577 ceph_cap_cachep = KMEM_CACHE(ceph_cap, 578 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 579 if (ceph_cap_cachep == NULL) 580 goto bad_cap; 581 582 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 583 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 584 if (ceph_dentry_cachep == NULL) 585 goto bad_dentry; 586 587 ceph_file_cachep = KMEM_CACHE(ceph_file_info, 588 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 589 if (ceph_file_cachep == NULL) 590 goto bad_file; 591 592 return 0; 593 594 bad_file: 595 kmem_cache_destroy(ceph_dentry_cachep); 596 bad_dentry: 597 kmem_cache_destroy(ceph_cap_cachep); 598 bad_cap: 599 kmem_cache_destroy(ceph_inode_cachep); 600 return -ENOMEM; 601 } 602 603 static void destroy_caches(void) 604 { 605 kmem_cache_destroy(ceph_inode_cachep); 606 kmem_cache_destroy(ceph_cap_cachep); 607 kmem_cache_destroy(ceph_dentry_cachep); 608 kmem_cache_destroy(ceph_file_cachep); 609 } 610 611 612 /* 613 * ceph_umount_begin - initiate forced umount. Tear down down the 614 * mount, skipping steps that may hang while waiting for server(s). 615 */ 616 static void ceph_umount_begin(struct super_block *sb) 617 { 618 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 619 620 dout("ceph_umount_begin - starting forced umount\n"); 621 if (!fsc) 622 return; 623 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 624 return; 625 } 626 627 static const struct super_operations ceph_super_ops = { 628 .alloc_inode = ceph_alloc_inode, 629 .destroy_inode = ceph_destroy_inode, 630 .write_inode = ceph_write_inode, 631 .sync_fs = ceph_sync_fs, 632 .put_super = ceph_put_super, 633 .show_options = ceph_show_options, 634 .statfs = ceph_statfs, 635 .umount_begin = ceph_umount_begin, 636 }; 637 638 /* 639 * Bootstrap mount by opening the root directory. Note the mount 640 * @started time from caller, and time out if this takes too long. 641 */ 642 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 643 const char *path, 644 unsigned long started) 645 { 646 struct ceph_mds_client *mdsc = fsc->mdsc; 647 struct ceph_mds_request *req = NULL; 648 int err; 649 struct dentry *root; 650 651 /* open dir */ 652 dout("open_root_inode opening '%s'\n", path); 653 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 654 if (IS_ERR(req)) 655 return ERR_CAST(req); 656 req->r_path1 = kstrdup(path, GFP_NOFS); 657 req->r_ino1.ino = CEPH_INO_ROOT; 658 req->r_ino1.snap = CEPH_NOSNAP; 659 req->r_started = started; 660 req->r_timeout = fsc->client->options->mount_timeout * HZ; 661 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 662 req->r_num_caps = 2; 663 err = ceph_mdsc_do_request(mdsc, NULL, req); 664 if (err == 0) { 665 struct inode *inode = req->r_target_inode; 666 req->r_target_inode = NULL; 667 dout("open_root_inode success\n"); 668 if (ceph_ino(inode) == CEPH_INO_ROOT && 669 fsc->sb->s_root == NULL) { 670 root = d_make_root(inode); 671 if (!root) { 672 root = ERR_PTR(-ENOMEM); 673 goto out; 674 } 675 } else { 676 root = d_obtain_alias(inode); 677 } 678 ceph_init_dentry(root); 679 dout("open_root_inode success, root dentry is %p\n", root); 680 } else { 681 root = ERR_PTR(err); 682 } 683 out: 684 ceph_mdsc_put_request(req); 685 return root; 686 } 687 688 689 690 691 /* 692 * mount: join the ceph cluster, and open root directory. 693 */ 694 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 695 const char *path) 696 { 697 int err; 698 unsigned long started = jiffies; /* note the start time */ 699 struct dentry *root; 700 int first = 0; /* first vfsmount for this super_block */ 701 702 dout("mount start\n"); 703 mutex_lock(&fsc->client->mount_mutex); 704 705 err = __ceph_open_session(fsc->client, started); 706 if (err < 0) 707 goto out; 708 709 dout("mount opening root\n"); 710 root = open_root_dentry(fsc, "", started); 711 if (IS_ERR(root)) { 712 err = PTR_ERR(root); 713 goto out; 714 } 715 if (fsc->sb->s_root) { 716 dput(root); 717 } else { 718 fsc->sb->s_root = root; 719 first = 1; 720 721 err = ceph_fs_debugfs_init(fsc); 722 if (err < 0) 723 goto fail; 724 } 725 726 if (path[0] == 0) { 727 dget(root); 728 } else { 729 dout("mount opening base mountpoint\n"); 730 root = open_root_dentry(fsc, path, started); 731 if (IS_ERR(root)) { 732 err = PTR_ERR(root); 733 goto fail; 734 } 735 } 736 737 fsc->mount_state = CEPH_MOUNT_MOUNTED; 738 dout("mount success\n"); 739 mutex_unlock(&fsc->client->mount_mutex); 740 return root; 741 742 out: 743 mutex_unlock(&fsc->client->mount_mutex); 744 return ERR_PTR(err); 745 746 fail: 747 if (first) { 748 dput(fsc->sb->s_root); 749 fsc->sb->s_root = NULL; 750 } 751 goto out; 752 } 753 754 static int ceph_set_super(struct super_block *s, void *data) 755 { 756 struct ceph_fs_client *fsc = data; 757 int ret; 758 759 dout("set_super %p data %p\n", s, data); 760 761 s->s_flags = fsc->mount_options->sb_flags; 762 s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ 763 764 s->s_fs_info = fsc; 765 fsc->sb = s; 766 767 s->s_op = &ceph_super_ops; 768 s->s_export_op = &ceph_export_ops; 769 770 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 771 772 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 773 if (ret != 0) 774 goto fail; 775 776 return ret; 777 778 fail: 779 s->s_fs_info = NULL; 780 fsc->sb = NULL; 781 return ret; 782 } 783 784 /* 785 * share superblock if same fs AND options 786 */ 787 static int ceph_compare_super(struct super_block *sb, void *data) 788 { 789 struct ceph_fs_client *new = data; 790 struct ceph_mount_options *fsopt = new->mount_options; 791 struct ceph_options *opt = new->client->options; 792 struct ceph_fs_client *other = ceph_sb_to_client(sb); 793 794 dout("ceph_compare_super %p\n", sb); 795 796 if (compare_mount_options(fsopt, opt, other)) { 797 dout("monitor(s)/mount options don't match\n"); 798 return 0; 799 } 800 if ((opt->flags & CEPH_OPT_FSID) && 801 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 802 dout("fsid doesn't match\n"); 803 return 0; 804 } 805 if (fsopt->sb_flags != other->mount_options->sb_flags) { 806 dout("flags differ\n"); 807 return 0; 808 } 809 return 1; 810 } 811 812 /* 813 * construct our own bdi so we can control readahead, etc. 814 */ 815 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 816 817 static int ceph_register_bdi(struct super_block *sb, 818 struct ceph_fs_client *fsc) 819 { 820 int err; 821 822 /* set ra_pages based on rasize mount option? */ 823 if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE) 824 fsc->backing_dev_info.ra_pages = 825 (fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1) 826 >> PAGE_SHIFT; 827 else 828 fsc->backing_dev_info.ra_pages = 829 default_backing_dev_info.ra_pages; 830 831 err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", 832 atomic_long_inc_return(&bdi_seq)); 833 if (!err) 834 sb->s_bdi = &fsc->backing_dev_info; 835 return err; 836 } 837 838 static struct dentry *ceph_mount(struct file_system_type *fs_type, 839 int flags, const char *dev_name, void *data) 840 { 841 struct super_block *sb; 842 struct ceph_fs_client *fsc; 843 struct dentry *res; 844 int err; 845 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 846 const char *path = NULL; 847 struct ceph_mount_options *fsopt = NULL; 848 struct ceph_options *opt = NULL; 849 850 dout("ceph_mount\n"); 851 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); 852 if (err < 0) { 853 res = ERR_PTR(err); 854 goto out_final; 855 } 856 857 /* create client (which we may/may not use) */ 858 fsc = create_fs_client(fsopt, opt); 859 if (IS_ERR(fsc)) { 860 res = ERR_CAST(fsc); 861 destroy_mount_options(fsopt); 862 ceph_destroy_options(opt); 863 goto out_final; 864 } 865 866 err = ceph_mdsc_init(fsc); 867 if (err < 0) { 868 res = ERR_PTR(err); 869 goto out; 870 } 871 872 if (ceph_test_opt(fsc->client, NOSHARE)) 873 compare_super = NULL; 874 sb = sget(fs_type, compare_super, ceph_set_super, fsc); 875 if (IS_ERR(sb)) { 876 res = ERR_CAST(sb); 877 goto out; 878 } 879 880 if (ceph_sb_to_client(sb) != fsc) { 881 ceph_mdsc_destroy(fsc); 882 destroy_fs_client(fsc); 883 fsc = ceph_sb_to_client(sb); 884 dout("get_sb got existing client %p\n", fsc); 885 } else { 886 dout("get_sb using new client %p\n", fsc); 887 err = ceph_register_bdi(sb, fsc); 888 if (err < 0) { 889 res = ERR_PTR(err); 890 goto out_splat; 891 } 892 } 893 894 res = ceph_real_mount(fsc, path); 895 if (IS_ERR(res)) 896 goto out_splat; 897 dout("root %p inode %p ino %llx.%llx\n", res, 898 res->d_inode, ceph_vinop(res->d_inode)); 899 return res; 900 901 out_splat: 902 ceph_mdsc_close_sessions(fsc->mdsc); 903 deactivate_locked_super(sb); 904 goto out_final; 905 906 out: 907 ceph_mdsc_destroy(fsc); 908 destroy_fs_client(fsc); 909 out_final: 910 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 911 return res; 912 } 913 914 static void ceph_kill_sb(struct super_block *s) 915 { 916 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 917 dout("kill_sb %p\n", s); 918 ceph_mdsc_pre_umount(fsc->mdsc); 919 kill_anon_super(s); /* will call put_super after sb is r/o */ 920 ceph_mdsc_destroy(fsc); 921 destroy_fs_client(fsc); 922 } 923 924 static struct file_system_type ceph_fs_type = { 925 .owner = THIS_MODULE, 926 .name = "ceph", 927 .mount = ceph_mount, 928 .kill_sb = ceph_kill_sb, 929 .fs_flags = FS_RENAME_DOES_D_MOVE, 930 }; 931 932 #define _STRINGIFY(x) #x 933 #define STRINGIFY(x) _STRINGIFY(x) 934 935 static int __init init_ceph(void) 936 { 937 int ret = init_caches(); 938 if (ret) 939 goto out; 940 941 ceph_xattr_init(); 942 ret = register_filesystem(&ceph_fs_type); 943 if (ret) 944 goto out_icache; 945 946 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 947 948 return 0; 949 950 out_icache: 951 ceph_xattr_exit(); 952 destroy_caches(); 953 out: 954 return ret; 955 } 956 957 static void __exit exit_ceph(void) 958 { 959 dout("exit_ceph\n"); 960 unregister_filesystem(&ceph_fs_type); 961 ceph_xattr_exit(); 962 destroy_caches(); 963 } 964 965 module_init(init_ceph); 966 module_exit(exit_ceph); 967 968 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 969 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 970 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 971 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 972 MODULE_LICENSE("GPL"); 973