1 2 #include <linux/ceph/ceph_debug.h> 3 4 #include <linux/backing-dev.h> 5 #include <linux/ctype.h> 6 #include <linux/fs.h> 7 #include <linux/inet.h> 8 #include <linux/in6.h> 9 #include <linux/module.h> 10 #include <linux/mount.h> 11 #include <linux/parser.h> 12 #include <linux/sched.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/statfs.h> 16 #include <linux/string.h> 17 18 #include "super.h" 19 #include "mds_client.h" 20 #include "cache.h" 21 22 #include <linux/ceph/ceph_features.h> 23 #include <linux/ceph/decode.h> 24 #include <linux/ceph/mon_client.h> 25 #include <linux/ceph/auth.h> 26 #include <linux/ceph/debugfs.h> 27 28 /* 29 * Ceph superblock operations 30 * 31 * Handle the basics of mounting, unmounting. 32 */ 33 34 /* 35 * super ops 36 */ 37 static void ceph_put_super(struct super_block *s) 38 { 39 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 40 41 dout("put_super\n"); 42 ceph_mdsc_close_sessions(fsc->mdsc); 43 } 44 45 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 46 { 47 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 48 struct ceph_monmap *monmap = fsc->client->monc.monmap; 49 struct ceph_statfs st; 50 u64 fsid; 51 int err; 52 u64 data_pool; 53 54 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 55 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 56 } else { 57 data_pool = CEPH_NOPOOL; 58 } 59 60 dout("statfs\n"); 61 err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st); 62 if (err < 0) 63 return err; 64 65 /* fill in kstatfs */ 66 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 67 68 /* 69 * express utilization in terms of large blocks to avoid 70 * overflow on 32-bit machines. 71 * 72 * NOTE: for the time being, we make bsize == frsize to humor 73 * not-yet-ancient versions of glibc that are broken. 74 * Someday, we will probably want to report a real block 75 * size... whatever that may mean for a network file system! 76 */ 77 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 78 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 79 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 80 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 81 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 82 83 buf->f_files = le64_to_cpu(st.num_objects); 84 buf->f_ffree = -1; 85 buf->f_namelen = NAME_MAX; 86 87 /* leave fsid little-endian, regardless of host endianness */ 88 fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); 89 buf->f_fsid.val[0] = fsid & 0xffffffff; 90 buf->f_fsid.val[1] = fsid >> 32; 91 92 return 0; 93 } 94 95 96 static int ceph_sync_fs(struct super_block *sb, int wait) 97 { 98 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 99 100 if (!wait) { 101 dout("sync_fs (non-blocking)\n"); 102 ceph_flush_dirty_caps(fsc->mdsc); 103 dout("sync_fs (non-blocking) done\n"); 104 return 0; 105 } 106 107 dout("sync_fs (blocking)\n"); 108 ceph_osdc_sync(&fsc->client->osdc); 109 ceph_mdsc_sync(fsc->mdsc); 110 dout("sync_fs (blocking) done\n"); 111 return 0; 112 } 113 114 /* 115 * mount options 116 */ 117 enum { 118 Opt_wsize, 119 Opt_rsize, 120 Opt_rasize, 121 Opt_caps_wanted_delay_min, 122 Opt_caps_wanted_delay_max, 123 Opt_readdir_max_entries, 124 Opt_readdir_max_bytes, 125 Opt_congestion_kb, 126 Opt_last_int, 127 /* int args above */ 128 Opt_snapdirname, 129 Opt_mds_namespace, 130 Opt_fscache_uniq, 131 Opt_last_string, 132 /* string args above */ 133 Opt_dirstat, 134 Opt_nodirstat, 135 Opt_rbytes, 136 Opt_norbytes, 137 Opt_asyncreaddir, 138 Opt_noasyncreaddir, 139 Opt_dcache, 140 Opt_nodcache, 141 Opt_ino32, 142 Opt_noino32, 143 Opt_fscache, 144 Opt_nofscache, 145 Opt_poolperm, 146 Opt_nopoolperm, 147 Opt_require_active_mds, 148 Opt_norequire_active_mds, 149 #ifdef CONFIG_CEPH_FS_POSIX_ACL 150 Opt_acl, 151 #endif 152 Opt_noacl, 153 }; 154 155 static match_table_t fsopt_tokens = { 156 {Opt_wsize, "wsize=%d"}, 157 {Opt_rsize, "rsize=%d"}, 158 {Opt_rasize, "rasize=%d"}, 159 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 160 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 161 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 162 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 163 {Opt_congestion_kb, "write_congestion_kb=%d"}, 164 /* int args above */ 165 {Opt_snapdirname, "snapdirname=%s"}, 166 {Opt_mds_namespace, "mds_namespace=%s"}, 167 {Opt_fscache_uniq, "fsc=%s"}, 168 /* string args above */ 169 {Opt_dirstat, "dirstat"}, 170 {Opt_nodirstat, "nodirstat"}, 171 {Opt_rbytes, "rbytes"}, 172 {Opt_norbytes, "norbytes"}, 173 {Opt_asyncreaddir, "asyncreaddir"}, 174 {Opt_noasyncreaddir, "noasyncreaddir"}, 175 {Opt_dcache, "dcache"}, 176 {Opt_nodcache, "nodcache"}, 177 {Opt_ino32, "ino32"}, 178 {Opt_noino32, "noino32"}, 179 {Opt_fscache, "fsc"}, 180 {Opt_nofscache, "nofsc"}, 181 {Opt_poolperm, "poolperm"}, 182 {Opt_nopoolperm, "nopoolperm"}, 183 {Opt_require_active_mds, "require_active_mds"}, 184 {Opt_norequire_active_mds, "norequire_active_mds"}, 185 #ifdef CONFIG_CEPH_FS_POSIX_ACL 186 {Opt_acl, "acl"}, 187 #endif 188 {Opt_noacl, "noacl"}, 189 {-1, NULL} 190 }; 191 192 static int parse_fsopt_token(char *c, void *private) 193 { 194 struct ceph_mount_options *fsopt = private; 195 substring_t argstr[MAX_OPT_ARGS]; 196 int token, intval, ret; 197 198 token = match_token((char *)c, fsopt_tokens, argstr); 199 if (token < 0) 200 return -EINVAL; 201 202 if (token < Opt_last_int) { 203 ret = match_int(&argstr[0], &intval); 204 if (ret < 0) { 205 pr_err("bad mount option arg (not int) " 206 "at '%s'\n", c); 207 return ret; 208 } 209 dout("got int token %d val %d\n", token, intval); 210 } else if (token > Opt_last_int && token < Opt_last_string) { 211 dout("got string token %d val %s\n", token, 212 argstr[0].from); 213 } else { 214 dout("got token %d\n", token); 215 } 216 217 switch (token) { 218 case Opt_snapdirname: 219 kfree(fsopt->snapdir_name); 220 fsopt->snapdir_name = kstrndup(argstr[0].from, 221 argstr[0].to-argstr[0].from, 222 GFP_KERNEL); 223 if (!fsopt->snapdir_name) 224 return -ENOMEM; 225 break; 226 case Opt_mds_namespace: 227 fsopt->mds_namespace = kstrndup(argstr[0].from, 228 argstr[0].to-argstr[0].from, 229 GFP_KERNEL); 230 if (!fsopt->mds_namespace) 231 return -ENOMEM; 232 break; 233 case Opt_fscache_uniq: 234 fsopt->fscache_uniq = kstrndup(argstr[0].from, 235 argstr[0].to-argstr[0].from, 236 GFP_KERNEL); 237 if (!fsopt->fscache_uniq) 238 return -ENOMEM; 239 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 240 break; 241 /* misc */ 242 case Opt_wsize: 243 if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 244 return -EINVAL; 245 fsopt->wsize = ALIGN(intval, PAGE_SIZE); 246 break; 247 case Opt_rsize: 248 if (intval < PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 249 return -EINVAL; 250 fsopt->rsize = ALIGN(intval, PAGE_SIZE); 251 break; 252 case Opt_rasize: 253 if (intval < 0) 254 return -EINVAL; 255 fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE); 256 break; 257 case Opt_caps_wanted_delay_min: 258 if (intval < 1) 259 return -EINVAL; 260 fsopt->caps_wanted_delay_min = intval; 261 break; 262 case Opt_caps_wanted_delay_max: 263 if (intval < 1) 264 return -EINVAL; 265 fsopt->caps_wanted_delay_max = intval; 266 break; 267 case Opt_readdir_max_entries: 268 if (intval < 1) 269 return -EINVAL; 270 fsopt->max_readdir = intval; 271 break; 272 case Opt_readdir_max_bytes: 273 if (intval < PAGE_SIZE && intval != 0) 274 return -EINVAL; 275 fsopt->max_readdir_bytes = intval; 276 break; 277 case Opt_congestion_kb: 278 if (intval < 1024) /* at least 1M */ 279 return -EINVAL; 280 fsopt->congestion_kb = intval; 281 break; 282 case Opt_dirstat: 283 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 284 break; 285 case Opt_nodirstat: 286 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 287 break; 288 case Opt_rbytes: 289 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 290 break; 291 case Opt_norbytes: 292 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 293 break; 294 case Opt_asyncreaddir: 295 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 296 break; 297 case Opt_noasyncreaddir: 298 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 299 break; 300 case Opt_dcache: 301 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 302 break; 303 case Opt_nodcache: 304 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 305 break; 306 case Opt_ino32: 307 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 308 break; 309 case Opt_noino32: 310 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 311 break; 312 case Opt_fscache: 313 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 314 break; 315 case Opt_nofscache: 316 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 317 break; 318 case Opt_poolperm: 319 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 320 printk ("pool perm"); 321 break; 322 case Opt_nopoolperm: 323 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 324 break; 325 case Opt_require_active_mds: 326 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 327 break; 328 case Opt_norequire_active_mds: 329 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 330 break; 331 #ifdef CONFIG_CEPH_FS_POSIX_ACL 332 case Opt_acl: 333 fsopt->sb_flags |= MS_POSIXACL; 334 break; 335 #endif 336 case Opt_noacl: 337 fsopt->sb_flags &= ~MS_POSIXACL; 338 break; 339 default: 340 BUG_ON(token); 341 } 342 return 0; 343 } 344 345 static void destroy_mount_options(struct ceph_mount_options *args) 346 { 347 dout("destroy_mount_options %p\n", args); 348 kfree(args->snapdir_name); 349 kfree(args->mds_namespace); 350 kfree(args->server_path); 351 kfree(args->fscache_uniq); 352 kfree(args); 353 } 354 355 static int strcmp_null(const char *s1, const char *s2) 356 { 357 if (!s1 && !s2) 358 return 0; 359 if (s1 && !s2) 360 return -1; 361 if (!s1 && s2) 362 return 1; 363 return strcmp(s1, s2); 364 } 365 366 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 367 struct ceph_options *new_opt, 368 struct ceph_fs_client *fsc) 369 { 370 struct ceph_mount_options *fsopt1 = new_fsopt; 371 struct ceph_mount_options *fsopt2 = fsc->mount_options; 372 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 373 int ret; 374 375 ret = memcmp(fsopt1, fsopt2, ofs); 376 if (ret) 377 return ret; 378 379 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 380 if (ret) 381 return ret; 382 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 383 if (ret) 384 return ret; 385 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 386 if (ret) 387 return ret; 388 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 389 if (ret) 390 return ret; 391 392 return ceph_compare_options(new_opt, fsc->client); 393 } 394 395 static int parse_mount_options(struct ceph_mount_options **pfsopt, 396 struct ceph_options **popt, 397 int flags, char *options, 398 const char *dev_name) 399 { 400 struct ceph_mount_options *fsopt; 401 const char *dev_name_end; 402 int err; 403 404 if (!dev_name || !*dev_name) 405 return -EINVAL; 406 407 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 408 if (!fsopt) 409 return -ENOMEM; 410 411 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 412 413 fsopt->sb_flags = flags; 414 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 415 416 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 417 fsopt->rsize = CEPH_MAX_READ_SIZE; 418 fsopt->rasize = CEPH_RASIZE_DEFAULT; 419 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 420 if (!fsopt->snapdir_name) { 421 err = -ENOMEM; 422 goto out; 423 } 424 425 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 426 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 427 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 428 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 429 fsopt->congestion_kb = default_congestion_kb(); 430 431 /* 432 * Distinguish the server list from the path in "dev_name". 433 * Internally we do not include the leading '/' in the path. 434 * 435 * "dev_name" will look like: 436 * <server_spec>[,<server_spec>...]:[<path>] 437 * where 438 * <server_spec> is <ip>[:<port>] 439 * <path> is optional, but if present must begin with '/' 440 */ 441 dev_name_end = strchr(dev_name, '/'); 442 if (dev_name_end) { 443 if (strlen(dev_name_end) > 1) { 444 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 445 if (!fsopt->server_path) { 446 err = -ENOMEM; 447 goto out; 448 } 449 } 450 } else { 451 dev_name_end = dev_name + strlen(dev_name); 452 } 453 err = -EINVAL; 454 dev_name_end--; /* back up to ':' separator */ 455 if (dev_name_end < dev_name || *dev_name_end != ':') { 456 pr_err("device name is missing path (no : separator in %s)\n", 457 dev_name); 458 goto out; 459 } 460 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 461 if (fsopt->server_path) 462 dout("server path '%s'\n", fsopt->server_path); 463 464 *popt = ceph_parse_options(options, dev_name, dev_name_end, 465 parse_fsopt_token, (void *)fsopt); 466 if (IS_ERR(*popt)) { 467 err = PTR_ERR(*popt); 468 goto out; 469 } 470 471 /* success */ 472 *pfsopt = fsopt; 473 return 0; 474 475 out: 476 destroy_mount_options(fsopt); 477 return err; 478 } 479 480 /** 481 * ceph_show_options - Show mount options in /proc/mounts 482 * @m: seq_file to write to 483 * @root: root of that (sub)tree 484 */ 485 static int ceph_show_options(struct seq_file *m, struct dentry *root) 486 { 487 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 488 struct ceph_mount_options *fsopt = fsc->mount_options; 489 size_t pos; 490 int ret; 491 492 /* a comma between MNT/MS and client options */ 493 seq_putc(m, ','); 494 pos = m->count; 495 496 ret = ceph_print_client_options(m, fsc->client); 497 if (ret) 498 return ret; 499 500 /* retract our comma if no client options */ 501 if (m->count == pos) 502 m->count--; 503 504 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 505 seq_puts(m, ",dirstat"); 506 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 507 seq_puts(m, ",rbytes"); 508 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 509 seq_puts(m, ",noasyncreaddir"); 510 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 511 seq_puts(m, ",nodcache"); 512 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 513 if (fsopt->fscache_uniq) 514 seq_printf(m, ",fsc=%s", fsopt->fscache_uniq); 515 else 516 seq_puts(m, ",fsc"); 517 } 518 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 519 seq_puts(m, ",nopoolperm"); 520 521 #ifdef CONFIG_CEPH_FS_POSIX_ACL 522 if (fsopt->sb_flags & MS_POSIXACL) 523 seq_puts(m, ",acl"); 524 else 525 seq_puts(m, ",noacl"); 526 #endif 527 528 if (fsopt->mds_namespace) 529 seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace); 530 if (fsopt->wsize) 531 seq_printf(m, ",wsize=%d", fsopt->wsize); 532 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 533 seq_printf(m, ",rsize=%d", fsopt->rsize); 534 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 535 seq_printf(m, ",rasize=%d", fsopt->rasize); 536 if (fsopt->congestion_kb != default_congestion_kb()) 537 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 538 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 539 seq_printf(m, ",caps_wanted_delay_min=%d", 540 fsopt->caps_wanted_delay_min); 541 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 542 seq_printf(m, ",caps_wanted_delay_max=%d", 543 fsopt->caps_wanted_delay_max); 544 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 545 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 546 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 547 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 548 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 549 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 550 551 return 0; 552 } 553 554 /* 555 * handle any mon messages the standard library doesn't understand. 556 * return error if we don't either. 557 */ 558 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 559 { 560 struct ceph_fs_client *fsc = client->private; 561 int type = le16_to_cpu(msg->hdr.type); 562 563 switch (type) { 564 case CEPH_MSG_MDS_MAP: 565 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 566 return 0; 567 case CEPH_MSG_FS_MAP_USER: 568 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 569 return 0; 570 default: 571 return -1; 572 } 573 } 574 575 /* 576 * create a new fs client 577 */ 578 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 579 struct ceph_options *opt) 580 { 581 struct ceph_fs_client *fsc; 582 int page_count; 583 size_t size; 584 int err = -ENOMEM; 585 586 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 587 if (!fsc) 588 return ERR_PTR(-ENOMEM); 589 590 fsc->client = ceph_create_client(opt, fsc); 591 if (IS_ERR(fsc->client)) { 592 err = PTR_ERR(fsc->client); 593 goto fail; 594 } 595 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 596 597 if (!fsopt->mds_namespace) { 598 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 599 0, true); 600 } else { 601 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 602 0, false); 603 } 604 605 fsc->mount_options = fsopt; 606 607 fsc->sb = NULL; 608 fsc->mount_state = CEPH_MOUNT_MOUNTING; 609 610 atomic_long_set(&fsc->writeback_count, 0); 611 612 err = -ENOMEM; 613 /* 614 * The number of concurrent works can be high but they don't need 615 * to be processed in parallel, limit concurrency. 616 */ 617 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 618 if (!fsc->wb_wq) 619 goto fail_client; 620 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 621 if (!fsc->pg_inv_wq) 622 goto fail_wb_wq; 623 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 624 if (!fsc->trunc_wq) 625 goto fail_pg_inv_wq; 626 627 /* set up mempools */ 628 err = -ENOMEM; 629 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 630 size = sizeof (struct page *) * (page_count ? page_count : 1); 631 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 632 if (!fsc->wb_pagevec_pool) 633 goto fail_trunc_wq; 634 635 /* caps */ 636 fsc->min_caps = fsopt->max_readdir; 637 638 return fsc; 639 640 fail_trunc_wq: 641 destroy_workqueue(fsc->trunc_wq); 642 fail_pg_inv_wq: 643 destroy_workqueue(fsc->pg_inv_wq); 644 fail_wb_wq: 645 destroy_workqueue(fsc->wb_wq); 646 fail_client: 647 ceph_destroy_client(fsc->client); 648 fail: 649 kfree(fsc); 650 return ERR_PTR(err); 651 } 652 653 static void destroy_fs_client(struct ceph_fs_client *fsc) 654 { 655 dout("destroy_fs_client %p\n", fsc); 656 657 destroy_workqueue(fsc->wb_wq); 658 destroy_workqueue(fsc->pg_inv_wq); 659 destroy_workqueue(fsc->trunc_wq); 660 661 mempool_destroy(fsc->wb_pagevec_pool); 662 663 destroy_mount_options(fsc->mount_options); 664 665 ceph_destroy_client(fsc->client); 666 667 kfree(fsc); 668 dout("destroy_fs_client %p done\n", fsc); 669 } 670 671 /* 672 * caches 673 */ 674 struct kmem_cache *ceph_inode_cachep; 675 struct kmem_cache *ceph_cap_cachep; 676 struct kmem_cache *ceph_cap_flush_cachep; 677 struct kmem_cache *ceph_dentry_cachep; 678 struct kmem_cache *ceph_file_cachep; 679 680 static void ceph_inode_init_once(void *foo) 681 { 682 struct ceph_inode_info *ci = foo; 683 inode_init_once(&ci->vfs_inode); 684 } 685 686 static int __init init_caches(void) 687 { 688 int error = -ENOMEM; 689 690 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 691 sizeof(struct ceph_inode_info), 692 __alignof__(struct ceph_inode_info), 693 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 694 SLAB_ACCOUNT, ceph_inode_init_once); 695 if (!ceph_inode_cachep) 696 return -ENOMEM; 697 698 ceph_cap_cachep = KMEM_CACHE(ceph_cap, 699 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 700 if (!ceph_cap_cachep) 701 goto bad_cap; 702 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 703 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 704 if (!ceph_cap_flush_cachep) 705 goto bad_cap_flush; 706 707 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 708 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 709 if (!ceph_dentry_cachep) 710 goto bad_dentry; 711 712 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 713 714 if (!ceph_file_cachep) 715 goto bad_file; 716 717 if ((error = ceph_fscache_register())) 718 goto bad_file; 719 720 return 0; 721 bad_file: 722 kmem_cache_destroy(ceph_dentry_cachep); 723 bad_dentry: 724 kmem_cache_destroy(ceph_cap_flush_cachep); 725 bad_cap_flush: 726 kmem_cache_destroy(ceph_cap_cachep); 727 bad_cap: 728 kmem_cache_destroy(ceph_inode_cachep); 729 return error; 730 } 731 732 static void destroy_caches(void) 733 { 734 /* 735 * Make sure all delayed rcu free inodes are flushed before we 736 * destroy cache. 737 */ 738 rcu_barrier(); 739 740 kmem_cache_destroy(ceph_inode_cachep); 741 kmem_cache_destroy(ceph_cap_cachep); 742 kmem_cache_destroy(ceph_cap_flush_cachep); 743 kmem_cache_destroy(ceph_dentry_cachep); 744 kmem_cache_destroy(ceph_file_cachep); 745 746 ceph_fscache_unregister(); 747 } 748 749 750 /* 751 * ceph_umount_begin - initiate forced umount. Tear down down the 752 * mount, skipping steps that may hang while waiting for server(s). 753 */ 754 static void ceph_umount_begin(struct super_block *sb) 755 { 756 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 757 758 dout("ceph_umount_begin - starting forced umount\n"); 759 if (!fsc) 760 return; 761 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 762 ceph_mdsc_force_umount(fsc->mdsc); 763 return; 764 } 765 766 static const struct super_operations ceph_super_ops = { 767 .alloc_inode = ceph_alloc_inode, 768 .destroy_inode = ceph_destroy_inode, 769 .write_inode = ceph_write_inode, 770 .drop_inode = ceph_drop_inode, 771 .sync_fs = ceph_sync_fs, 772 .put_super = ceph_put_super, 773 .show_options = ceph_show_options, 774 .statfs = ceph_statfs, 775 .umount_begin = ceph_umount_begin, 776 }; 777 778 /* 779 * Bootstrap mount by opening the root directory. Note the mount 780 * @started time from caller, and time out if this takes too long. 781 */ 782 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 783 const char *path, 784 unsigned long started) 785 { 786 struct ceph_mds_client *mdsc = fsc->mdsc; 787 struct ceph_mds_request *req = NULL; 788 int err; 789 struct dentry *root; 790 791 /* open dir */ 792 dout("open_root_inode opening '%s'\n", path); 793 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 794 if (IS_ERR(req)) 795 return ERR_CAST(req); 796 req->r_path1 = kstrdup(path, GFP_NOFS); 797 if (!req->r_path1) { 798 root = ERR_PTR(-ENOMEM); 799 goto out; 800 } 801 802 req->r_ino1.ino = CEPH_INO_ROOT; 803 req->r_ino1.snap = CEPH_NOSNAP; 804 req->r_started = started; 805 req->r_timeout = fsc->client->options->mount_timeout; 806 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 807 req->r_num_caps = 2; 808 err = ceph_mdsc_do_request(mdsc, NULL, req); 809 if (err == 0) { 810 struct inode *inode = req->r_target_inode; 811 req->r_target_inode = NULL; 812 dout("open_root_inode success\n"); 813 root = d_make_root(inode); 814 if (!root) { 815 root = ERR_PTR(-ENOMEM); 816 goto out; 817 } 818 dout("open_root_inode success, root dentry is %p\n", root); 819 } else { 820 root = ERR_PTR(err); 821 } 822 out: 823 ceph_mdsc_put_request(req); 824 return root; 825 } 826 827 828 829 830 /* 831 * mount: join the ceph cluster, and open root directory. 832 */ 833 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) 834 { 835 int err; 836 unsigned long started = jiffies; /* note the start time */ 837 struct dentry *root; 838 int first = 0; /* first vfsmount for this super_block */ 839 840 dout("mount start %p\n", fsc); 841 mutex_lock(&fsc->client->mount_mutex); 842 843 if (!fsc->sb->s_root) { 844 const char *path; 845 err = __ceph_open_session(fsc->client, started); 846 if (err < 0) 847 goto out; 848 849 /* setup fscache */ 850 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 851 err = ceph_fscache_register_fs(fsc); 852 if (err < 0) 853 goto out; 854 } 855 856 if (!fsc->mount_options->server_path) { 857 path = ""; 858 dout("mount opening path \\t\n"); 859 } else { 860 path = fsc->mount_options->server_path + 1; 861 dout("mount opening path %s\n", path); 862 } 863 root = open_root_dentry(fsc, path, started); 864 if (IS_ERR(root)) { 865 err = PTR_ERR(root); 866 goto out; 867 } 868 fsc->sb->s_root = dget(root); 869 first = 1; 870 871 err = ceph_fs_debugfs_init(fsc); 872 if (err < 0) 873 goto fail; 874 } else { 875 root = dget(fsc->sb->s_root); 876 } 877 878 fsc->mount_state = CEPH_MOUNT_MOUNTED; 879 dout("mount success\n"); 880 mutex_unlock(&fsc->client->mount_mutex); 881 return root; 882 883 fail: 884 if (first) { 885 dput(fsc->sb->s_root); 886 fsc->sb->s_root = NULL; 887 } 888 out: 889 mutex_unlock(&fsc->client->mount_mutex); 890 return ERR_PTR(err); 891 } 892 893 static int ceph_set_super(struct super_block *s, void *data) 894 { 895 struct ceph_fs_client *fsc = data; 896 int ret; 897 898 dout("set_super %p data %p\n", s, data); 899 900 s->s_flags = fsc->mount_options->sb_flags; 901 s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ 902 903 s->s_xattr = ceph_xattr_handlers; 904 s->s_fs_info = fsc; 905 fsc->sb = s; 906 907 s->s_op = &ceph_super_ops; 908 s->s_d_op = &ceph_dentry_ops; 909 s->s_export_op = &ceph_export_ops; 910 911 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 912 913 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 914 if (ret != 0) 915 goto fail; 916 917 return ret; 918 919 fail: 920 s->s_fs_info = NULL; 921 fsc->sb = NULL; 922 return ret; 923 } 924 925 /* 926 * share superblock if same fs AND options 927 */ 928 static int ceph_compare_super(struct super_block *sb, void *data) 929 { 930 struct ceph_fs_client *new = data; 931 struct ceph_mount_options *fsopt = new->mount_options; 932 struct ceph_options *opt = new->client->options; 933 struct ceph_fs_client *other = ceph_sb_to_client(sb); 934 935 dout("ceph_compare_super %p\n", sb); 936 937 if (compare_mount_options(fsopt, opt, other)) { 938 dout("monitor(s)/mount options don't match\n"); 939 return 0; 940 } 941 if ((opt->flags & CEPH_OPT_FSID) && 942 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 943 dout("fsid doesn't match\n"); 944 return 0; 945 } 946 if (fsopt->sb_flags != other->mount_options->sb_flags) { 947 dout("flags differ\n"); 948 return 0; 949 } 950 return 1; 951 } 952 953 /* 954 * construct our own bdi so we can control readahead, etc. 955 */ 956 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 957 958 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 959 { 960 int err; 961 962 err = super_setup_bdi_name(sb, "ceph-%ld", 963 atomic_long_inc_return(&bdi_seq)); 964 if (err) 965 return err; 966 967 /* set ra_pages based on rasize mount option? */ 968 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 969 970 /* set io_pages based on max osd read size */ 971 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 972 973 return 0; 974 } 975 976 static struct dentry *ceph_mount(struct file_system_type *fs_type, 977 int flags, const char *dev_name, void *data) 978 { 979 struct super_block *sb; 980 struct ceph_fs_client *fsc; 981 struct dentry *res; 982 int err; 983 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 984 struct ceph_mount_options *fsopt = NULL; 985 struct ceph_options *opt = NULL; 986 987 dout("ceph_mount\n"); 988 989 #ifdef CONFIG_CEPH_FS_POSIX_ACL 990 flags |= MS_POSIXACL; 991 #endif 992 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); 993 if (err < 0) { 994 res = ERR_PTR(err); 995 goto out_final; 996 } 997 998 /* create client (which we may/may not use) */ 999 fsc = create_fs_client(fsopt, opt); 1000 if (IS_ERR(fsc)) { 1001 res = ERR_CAST(fsc); 1002 destroy_mount_options(fsopt); 1003 ceph_destroy_options(opt); 1004 goto out_final; 1005 } 1006 1007 err = ceph_mdsc_init(fsc); 1008 if (err < 0) { 1009 res = ERR_PTR(err); 1010 goto out; 1011 } 1012 1013 if (ceph_test_opt(fsc->client, NOSHARE)) 1014 compare_super = NULL; 1015 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 1016 if (IS_ERR(sb)) { 1017 res = ERR_CAST(sb); 1018 goto out; 1019 } 1020 1021 if (ceph_sb_to_client(sb) != fsc) { 1022 ceph_mdsc_destroy(fsc); 1023 destroy_fs_client(fsc); 1024 fsc = ceph_sb_to_client(sb); 1025 dout("get_sb got existing client %p\n", fsc); 1026 } else { 1027 dout("get_sb using new client %p\n", fsc); 1028 err = ceph_setup_bdi(sb, fsc); 1029 if (err < 0) { 1030 res = ERR_PTR(err); 1031 goto out_splat; 1032 } 1033 } 1034 1035 res = ceph_real_mount(fsc); 1036 if (IS_ERR(res)) 1037 goto out_splat; 1038 dout("root %p inode %p ino %llx.%llx\n", res, 1039 d_inode(res), ceph_vinop(d_inode(res))); 1040 return res; 1041 1042 out_splat: 1043 ceph_mdsc_close_sessions(fsc->mdsc); 1044 deactivate_locked_super(sb); 1045 goto out_final; 1046 1047 out: 1048 ceph_mdsc_destroy(fsc); 1049 destroy_fs_client(fsc); 1050 out_final: 1051 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 1052 return res; 1053 } 1054 1055 static void ceph_kill_sb(struct super_block *s) 1056 { 1057 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1058 dev_t dev = s->s_dev; 1059 1060 dout("kill_sb %p\n", s); 1061 1062 ceph_mdsc_pre_umount(fsc->mdsc); 1063 generic_shutdown_super(s); 1064 1065 fsc->client->extra_mon_dispatch = NULL; 1066 ceph_fs_debugfs_cleanup(fsc); 1067 1068 ceph_fscache_unregister_fs(fsc); 1069 1070 ceph_mdsc_destroy(fsc); 1071 1072 destroy_fs_client(fsc); 1073 free_anon_bdev(dev); 1074 } 1075 1076 static struct file_system_type ceph_fs_type = { 1077 .owner = THIS_MODULE, 1078 .name = "ceph", 1079 .mount = ceph_mount, 1080 .kill_sb = ceph_kill_sb, 1081 .fs_flags = FS_RENAME_DOES_D_MOVE, 1082 }; 1083 MODULE_ALIAS_FS("ceph"); 1084 1085 static int __init init_ceph(void) 1086 { 1087 int ret = init_caches(); 1088 if (ret) 1089 goto out; 1090 1091 ceph_flock_init(); 1092 ceph_xattr_init(); 1093 ret = register_filesystem(&ceph_fs_type); 1094 if (ret) 1095 goto out_xattr; 1096 1097 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1098 1099 return 0; 1100 1101 out_xattr: 1102 ceph_xattr_exit(); 1103 destroy_caches(); 1104 out: 1105 return ret; 1106 } 1107 1108 static void __exit exit_ceph(void) 1109 { 1110 dout("exit_ceph\n"); 1111 unregister_filesystem(&ceph_fs_type); 1112 ceph_xattr_exit(); 1113 destroy_caches(); 1114 } 1115 1116 module_init(init_ceph); 1117 module_exit(exit_ceph); 1118 1119 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1120 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1121 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1122 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1123 MODULE_LICENSE("GPL"); 1124