1 2 #include <linux/ceph/ceph_debug.h> 3 4 #include <linux/backing-dev.h> 5 #include <linux/ctype.h> 6 #include <linux/fs.h> 7 #include <linux/inet.h> 8 #include <linux/in6.h> 9 #include <linux/module.h> 10 #include <linux/mount.h> 11 #include <linux/parser.h> 12 #include <linux/sched.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/statfs.h> 16 #include <linux/string.h> 17 18 #include "super.h" 19 #include "mds_client.h" 20 #include "cache.h" 21 22 #include <linux/ceph/ceph_features.h> 23 #include <linux/ceph/decode.h> 24 #include <linux/ceph/mon_client.h> 25 #include <linux/ceph/auth.h> 26 #include <linux/ceph/debugfs.h> 27 28 /* 29 * Ceph superblock operations 30 * 31 * Handle the basics of mounting, unmounting. 32 */ 33 34 /* 35 * super ops 36 */ 37 static void ceph_put_super(struct super_block *s) 38 { 39 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 40 41 dout("put_super\n"); 42 ceph_mdsc_close_sessions(fsc->mdsc); 43 } 44 45 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 46 { 47 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 48 struct ceph_monmap *monmap = fsc->client->monc.monmap; 49 struct ceph_statfs st; 50 u64 fsid; 51 int err; 52 u64 data_pool; 53 54 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 55 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 56 } else { 57 data_pool = CEPH_NOPOOL; 58 } 59 60 dout("statfs\n"); 61 err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st); 62 if (err < 0) 63 return err; 64 65 /* fill in kstatfs */ 66 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 67 68 /* 69 * express utilization in terms of large blocks to avoid 70 * overflow on 32-bit machines. 71 * 72 * NOTE: for the time being, we make bsize == frsize to humor 73 * not-yet-ancient versions of glibc that are broken. 74 * Someday, we will probably want to report a real block 75 * size... whatever that may mean for a network file system! 76 */ 77 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 78 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 79 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 80 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 81 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 82 83 buf->f_files = le64_to_cpu(st.num_objects); 84 buf->f_ffree = -1; 85 buf->f_namelen = NAME_MAX; 86 87 /* Must convert the fsid, for consistent values across arches */ 88 fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^ 89 le64_to_cpu(*((__le64 *)&monmap->fsid + 1)); 90 buf->f_fsid.val[0] = fsid & 0xffffffff; 91 buf->f_fsid.val[1] = fsid >> 32; 92 93 return 0; 94 } 95 96 97 static int ceph_sync_fs(struct super_block *sb, int wait) 98 { 99 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 100 101 if (!wait) { 102 dout("sync_fs (non-blocking)\n"); 103 ceph_flush_dirty_caps(fsc->mdsc); 104 dout("sync_fs (non-blocking) done\n"); 105 return 0; 106 } 107 108 dout("sync_fs (blocking)\n"); 109 ceph_osdc_sync(&fsc->client->osdc); 110 ceph_mdsc_sync(fsc->mdsc); 111 dout("sync_fs (blocking) done\n"); 112 return 0; 113 } 114 115 /* 116 * mount options 117 */ 118 enum { 119 Opt_wsize, 120 Opt_rsize, 121 Opt_rasize, 122 Opt_caps_wanted_delay_min, 123 Opt_caps_wanted_delay_max, 124 Opt_readdir_max_entries, 125 Opt_readdir_max_bytes, 126 Opt_congestion_kb, 127 Opt_last_int, 128 /* int args above */ 129 Opt_snapdirname, 130 Opt_mds_namespace, 131 Opt_fscache_uniq, 132 Opt_last_string, 133 /* string args above */ 134 Opt_dirstat, 135 Opt_nodirstat, 136 Opt_rbytes, 137 Opt_norbytes, 138 Opt_asyncreaddir, 139 Opt_noasyncreaddir, 140 Opt_dcache, 141 Opt_nodcache, 142 Opt_ino32, 143 Opt_noino32, 144 Opt_fscache, 145 Opt_nofscache, 146 Opt_poolperm, 147 Opt_nopoolperm, 148 Opt_require_active_mds, 149 Opt_norequire_active_mds, 150 #ifdef CONFIG_CEPH_FS_POSIX_ACL 151 Opt_acl, 152 #endif 153 Opt_noacl, 154 }; 155 156 static match_table_t fsopt_tokens = { 157 {Opt_wsize, "wsize=%d"}, 158 {Opt_rsize, "rsize=%d"}, 159 {Opt_rasize, "rasize=%d"}, 160 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 161 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 162 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 163 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 164 {Opt_congestion_kb, "write_congestion_kb=%d"}, 165 /* int args above */ 166 {Opt_snapdirname, "snapdirname=%s"}, 167 {Opt_mds_namespace, "mds_namespace=%s"}, 168 {Opt_fscache_uniq, "fsc=%s"}, 169 /* string args above */ 170 {Opt_dirstat, "dirstat"}, 171 {Opt_nodirstat, "nodirstat"}, 172 {Opt_rbytes, "rbytes"}, 173 {Opt_norbytes, "norbytes"}, 174 {Opt_asyncreaddir, "asyncreaddir"}, 175 {Opt_noasyncreaddir, "noasyncreaddir"}, 176 {Opt_dcache, "dcache"}, 177 {Opt_nodcache, "nodcache"}, 178 {Opt_ino32, "ino32"}, 179 {Opt_noino32, "noino32"}, 180 {Opt_fscache, "fsc"}, 181 {Opt_nofscache, "nofsc"}, 182 {Opt_poolperm, "poolperm"}, 183 {Opt_nopoolperm, "nopoolperm"}, 184 {Opt_require_active_mds, "require_active_mds"}, 185 {Opt_norequire_active_mds, "norequire_active_mds"}, 186 #ifdef CONFIG_CEPH_FS_POSIX_ACL 187 {Opt_acl, "acl"}, 188 #endif 189 {Opt_noacl, "noacl"}, 190 {-1, NULL} 191 }; 192 193 static int parse_fsopt_token(char *c, void *private) 194 { 195 struct ceph_mount_options *fsopt = private; 196 substring_t argstr[MAX_OPT_ARGS]; 197 int token, intval, ret; 198 199 token = match_token((char *)c, fsopt_tokens, argstr); 200 if (token < 0) 201 return -EINVAL; 202 203 if (token < Opt_last_int) { 204 ret = match_int(&argstr[0], &intval); 205 if (ret < 0) { 206 pr_err("bad mount option arg (not int) " 207 "at '%s'\n", c); 208 return ret; 209 } 210 dout("got int token %d val %d\n", token, intval); 211 } else if (token > Opt_last_int && token < Opt_last_string) { 212 dout("got string token %d val %s\n", token, 213 argstr[0].from); 214 } else { 215 dout("got token %d\n", token); 216 } 217 218 switch (token) { 219 case Opt_snapdirname: 220 kfree(fsopt->snapdir_name); 221 fsopt->snapdir_name = kstrndup(argstr[0].from, 222 argstr[0].to-argstr[0].from, 223 GFP_KERNEL); 224 if (!fsopt->snapdir_name) 225 return -ENOMEM; 226 break; 227 case Opt_mds_namespace: 228 fsopt->mds_namespace = kstrndup(argstr[0].from, 229 argstr[0].to-argstr[0].from, 230 GFP_KERNEL); 231 if (!fsopt->mds_namespace) 232 return -ENOMEM; 233 break; 234 case Opt_fscache_uniq: 235 fsopt->fscache_uniq = kstrndup(argstr[0].from, 236 argstr[0].to-argstr[0].from, 237 GFP_KERNEL); 238 if (!fsopt->fscache_uniq) 239 return -ENOMEM; 240 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 241 break; 242 /* misc */ 243 case Opt_wsize: 244 if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 245 return -EINVAL; 246 fsopt->wsize = ALIGN(intval, PAGE_SIZE); 247 break; 248 case Opt_rsize: 249 if (intval < PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 250 return -EINVAL; 251 fsopt->rsize = ALIGN(intval, PAGE_SIZE); 252 break; 253 case Opt_rasize: 254 if (intval < 0) 255 return -EINVAL; 256 fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE); 257 break; 258 case Opt_caps_wanted_delay_min: 259 if (intval < 1) 260 return -EINVAL; 261 fsopt->caps_wanted_delay_min = intval; 262 break; 263 case Opt_caps_wanted_delay_max: 264 if (intval < 1) 265 return -EINVAL; 266 fsopt->caps_wanted_delay_max = intval; 267 break; 268 case Opt_readdir_max_entries: 269 if (intval < 1) 270 return -EINVAL; 271 fsopt->max_readdir = intval; 272 break; 273 case Opt_readdir_max_bytes: 274 if (intval < PAGE_SIZE && intval != 0) 275 return -EINVAL; 276 fsopt->max_readdir_bytes = intval; 277 break; 278 case Opt_congestion_kb: 279 if (intval < 1024) /* at least 1M */ 280 return -EINVAL; 281 fsopt->congestion_kb = intval; 282 break; 283 case Opt_dirstat: 284 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 285 break; 286 case Opt_nodirstat: 287 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 288 break; 289 case Opt_rbytes: 290 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 291 break; 292 case Opt_norbytes: 293 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 294 break; 295 case Opt_asyncreaddir: 296 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 297 break; 298 case Opt_noasyncreaddir: 299 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 300 break; 301 case Opt_dcache: 302 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 303 break; 304 case Opt_nodcache: 305 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 306 break; 307 case Opt_ino32: 308 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 309 break; 310 case Opt_noino32: 311 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 312 break; 313 case Opt_fscache: 314 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 315 break; 316 case Opt_nofscache: 317 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 318 break; 319 case Opt_poolperm: 320 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 321 printk ("pool perm"); 322 break; 323 case Opt_nopoolperm: 324 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 325 break; 326 case Opt_require_active_mds: 327 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 328 break; 329 case Opt_norequire_active_mds: 330 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 331 break; 332 #ifdef CONFIG_CEPH_FS_POSIX_ACL 333 case Opt_acl: 334 fsopt->sb_flags |= SB_POSIXACL; 335 break; 336 #endif 337 case Opt_noacl: 338 fsopt->sb_flags &= ~SB_POSIXACL; 339 break; 340 default: 341 BUG_ON(token); 342 } 343 return 0; 344 } 345 346 static void destroy_mount_options(struct ceph_mount_options *args) 347 { 348 dout("destroy_mount_options %p\n", args); 349 kfree(args->snapdir_name); 350 kfree(args->mds_namespace); 351 kfree(args->server_path); 352 kfree(args->fscache_uniq); 353 kfree(args); 354 } 355 356 static int strcmp_null(const char *s1, const char *s2) 357 { 358 if (!s1 && !s2) 359 return 0; 360 if (s1 && !s2) 361 return -1; 362 if (!s1 && s2) 363 return 1; 364 return strcmp(s1, s2); 365 } 366 367 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 368 struct ceph_options *new_opt, 369 struct ceph_fs_client *fsc) 370 { 371 struct ceph_mount_options *fsopt1 = new_fsopt; 372 struct ceph_mount_options *fsopt2 = fsc->mount_options; 373 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 374 int ret; 375 376 ret = memcmp(fsopt1, fsopt2, ofs); 377 if (ret) 378 return ret; 379 380 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 381 if (ret) 382 return ret; 383 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 384 if (ret) 385 return ret; 386 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 387 if (ret) 388 return ret; 389 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 390 if (ret) 391 return ret; 392 393 return ceph_compare_options(new_opt, fsc->client); 394 } 395 396 static int parse_mount_options(struct ceph_mount_options **pfsopt, 397 struct ceph_options **popt, 398 int flags, char *options, 399 const char *dev_name) 400 { 401 struct ceph_mount_options *fsopt; 402 const char *dev_name_end; 403 int err; 404 405 if (!dev_name || !*dev_name) 406 return -EINVAL; 407 408 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 409 if (!fsopt) 410 return -ENOMEM; 411 412 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 413 414 fsopt->sb_flags = flags; 415 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 416 417 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 418 fsopt->rsize = CEPH_MAX_READ_SIZE; 419 fsopt->rasize = CEPH_RASIZE_DEFAULT; 420 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 421 if (!fsopt->snapdir_name) { 422 err = -ENOMEM; 423 goto out; 424 } 425 426 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 427 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 428 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 429 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 430 fsopt->congestion_kb = default_congestion_kb(); 431 432 /* 433 * Distinguish the server list from the path in "dev_name". 434 * Internally we do not include the leading '/' in the path. 435 * 436 * "dev_name" will look like: 437 * <server_spec>[,<server_spec>...]:[<path>] 438 * where 439 * <server_spec> is <ip>[:<port>] 440 * <path> is optional, but if present must begin with '/' 441 */ 442 dev_name_end = strchr(dev_name, '/'); 443 if (dev_name_end) { 444 if (strlen(dev_name_end) > 1) { 445 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 446 if (!fsopt->server_path) { 447 err = -ENOMEM; 448 goto out; 449 } 450 } 451 } else { 452 dev_name_end = dev_name + strlen(dev_name); 453 } 454 err = -EINVAL; 455 dev_name_end--; /* back up to ':' separator */ 456 if (dev_name_end < dev_name || *dev_name_end != ':') { 457 pr_err("device name is missing path (no : separator in %s)\n", 458 dev_name); 459 goto out; 460 } 461 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 462 if (fsopt->server_path) 463 dout("server path '%s'\n", fsopt->server_path); 464 465 *popt = ceph_parse_options(options, dev_name, dev_name_end, 466 parse_fsopt_token, (void *)fsopt); 467 if (IS_ERR(*popt)) { 468 err = PTR_ERR(*popt); 469 goto out; 470 } 471 472 /* success */ 473 *pfsopt = fsopt; 474 return 0; 475 476 out: 477 destroy_mount_options(fsopt); 478 return err; 479 } 480 481 /** 482 * ceph_show_options - Show mount options in /proc/mounts 483 * @m: seq_file to write to 484 * @root: root of that (sub)tree 485 */ 486 static int ceph_show_options(struct seq_file *m, struct dentry *root) 487 { 488 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 489 struct ceph_mount_options *fsopt = fsc->mount_options; 490 size_t pos; 491 int ret; 492 493 /* a comma between MNT/MS and client options */ 494 seq_putc(m, ','); 495 pos = m->count; 496 497 ret = ceph_print_client_options(m, fsc->client); 498 if (ret) 499 return ret; 500 501 /* retract our comma if no client options */ 502 if (m->count == pos) 503 m->count--; 504 505 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 506 seq_puts(m, ",dirstat"); 507 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 508 seq_puts(m, ",rbytes"); 509 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 510 seq_puts(m, ",noasyncreaddir"); 511 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 512 seq_puts(m, ",nodcache"); 513 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 514 if (fsopt->fscache_uniq) 515 seq_printf(m, ",fsc=%s", fsopt->fscache_uniq); 516 else 517 seq_puts(m, ",fsc"); 518 } 519 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 520 seq_puts(m, ",nopoolperm"); 521 522 #ifdef CONFIG_CEPH_FS_POSIX_ACL 523 if (fsopt->sb_flags & SB_POSIXACL) 524 seq_puts(m, ",acl"); 525 else 526 seq_puts(m, ",noacl"); 527 #endif 528 529 if (fsopt->mds_namespace) 530 seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace); 531 if (fsopt->wsize) 532 seq_printf(m, ",wsize=%d", fsopt->wsize); 533 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 534 seq_printf(m, ",rsize=%d", fsopt->rsize); 535 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 536 seq_printf(m, ",rasize=%d", fsopt->rasize); 537 if (fsopt->congestion_kb != default_congestion_kb()) 538 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 539 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 540 seq_printf(m, ",caps_wanted_delay_min=%d", 541 fsopt->caps_wanted_delay_min); 542 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 543 seq_printf(m, ",caps_wanted_delay_max=%d", 544 fsopt->caps_wanted_delay_max); 545 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 546 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 547 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 548 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 549 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 550 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 551 552 return 0; 553 } 554 555 /* 556 * handle any mon messages the standard library doesn't understand. 557 * return error if we don't either. 558 */ 559 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 560 { 561 struct ceph_fs_client *fsc = client->private; 562 int type = le16_to_cpu(msg->hdr.type); 563 564 switch (type) { 565 case CEPH_MSG_MDS_MAP: 566 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 567 return 0; 568 case CEPH_MSG_FS_MAP_USER: 569 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 570 return 0; 571 default: 572 return -1; 573 } 574 } 575 576 /* 577 * create a new fs client 578 */ 579 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 580 struct ceph_options *opt) 581 { 582 struct ceph_fs_client *fsc; 583 int page_count; 584 size_t size; 585 int err = -ENOMEM; 586 587 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 588 if (!fsc) 589 return ERR_PTR(-ENOMEM); 590 591 fsc->client = ceph_create_client(opt, fsc); 592 if (IS_ERR(fsc->client)) { 593 err = PTR_ERR(fsc->client); 594 goto fail; 595 } 596 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 597 598 if (!fsopt->mds_namespace) { 599 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 600 0, true); 601 } else { 602 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 603 0, false); 604 } 605 606 fsc->mount_options = fsopt; 607 608 fsc->sb = NULL; 609 fsc->mount_state = CEPH_MOUNT_MOUNTING; 610 611 atomic_long_set(&fsc->writeback_count, 0); 612 613 err = -ENOMEM; 614 /* 615 * The number of concurrent works can be high but they don't need 616 * to be processed in parallel, limit concurrency. 617 */ 618 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 619 if (!fsc->wb_wq) 620 goto fail_client; 621 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 622 if (!fsc->pg_inv_wq) 623 goto fail_wb_wq; 624 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 625 if (!fsc->trunc_wq) 626 goto fail_pg_inv_wq; 627 628 /* set up mempools */ 629 err = -ENOMEM; 630 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 631 size = sizeof (struct page *) * (page_count ? page_count : 1); 632 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 633 if (!fsc->wb_pagevec_pool) 634 goto fail_trunc_wq; 635 636 /* caps */ 637 fsc->min_caps = fsopt->max_readdir; 638 639 return fsc; 640 641 fail_trunc_wq: 642 destroy_workqueue(fsc->trunc_wq); 643 fail_pg_inv_wq: 644 destroy_workqueue(fsc->pg_inv_wq); 645 fail_wb_wq: 646 destroy_workqueue(fsc->wb_wq); 647 fail_client: 648 ceph_destroy_client(fsc->client); 649 fail: 650 kfree(fsc); 651 return ERR_PTR(err); 652 } 653 654 static void destroy_fs_client(struct ceph_fs_client *fsc) 655 { 656 dout("destroy_fs_client %p\n", fsc); 657 658 destroy_workqueue(fsc->wb_wq); 659 destroy_workqueue(fsc->pg_inv_wq); 660 destroy_workqueue(fsc->trunc_wq); 661 662 mempool_destroy(fsc->wb_pagevec_pool); 663 664 destroy_mount_options(fsc->mount_options); 665 666 ceph_destroy_client(fsc->client); 667 668 kfree(fsc); 669 dout("destroy_fs_client %p done\n", fsc); 670 } 671 672 /* 673 * caches 674 */ 675 struct kmem_cache *ceph_inode_cachep; 676 struct kmem_cache *ceph_cap_cachep; 677 struct kmem_cache *ceph_cap_flush_cachep; 678 struct kmem_cache *ceph_dentry_cachep; 679 struct kmem_cache *ceph_file_cachep; 680 681 static void ceph_inode_init_once(void *foo) 682 { 683 struct ceph_inode_info *ci = foo; 684 inode_init_once(&ci->vfs_inode); 685 } 686 687 static int __init init_caches(void) 688 { 689 int error = -ENOMEM; 690 691 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 692 sizeof(struct ceph_inode_info), 693 __alignof__(struct ceph_inode_info), 694 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 695 SLAB_ACCOUNT, ceph_inode_init_once); 696 if (!ceph_inode_cachep) 697 return -ENOMEM; 698 699 ceph_cap_cachep = KMEM_CACHE(ceph_cap, 700 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 701 if (!ceph_cap_cachep) 702 goto bad_cap; 703 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 704 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 705 if (!ceph_cap_flush_cachep) 706 goto bad_cap_flush; 707 708 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 709 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 710 if (!ceph_dentry_cachep) 711 goto bad_dentry; 712 713 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 714 715 if (!ceph_file_cachep) 716 goto bad_file; 717 718 if ((error = ceph_fscache_register())) 719 goto bad_file; 720 721 return 0; 722 bad_file: 723 kmem_cache_destroy(ceph_dentry_cachep); 724 bad_dentry: 725 kmem_cache_destroy(ceph_cap_flush_cachep); 726 bad_cap_flush: 727 kmem_cache_destroy(ceph_cap_cachep); 728 bad_cap: 729 kmem_cache_destroy(ceph_inode_cachep); 730 return error; 731 } 732 733 static void destroy_caches(void) 734 { 735 /* 736 * Make sure all delayed rcu free inodes are flushed before we 737 * destroy cache. 738 */ 739 rcu_barrier(); 740 741 kmem_cache_destroy(ceph_inode_cachep); 742 kmem_cache_destroy(ceph_cap_cachep); 743 kmem_cache_destroy(ceph_cap_flush_cachep); 744 kmem_cache_destroy(ceph_dentry_cachep); 745 kmem_cache_destroy(ceph_file_cachep); 746 747 ceph_fscache_unregister(); 748 } 749 750 751 /* 752 * ceph_umount_begin - initiate forced umount. Tear down down the 753 * mount, skipping steps that may hang while waiting for server(s). 754 */ 755 static void ceph_umount_begin(struct super_block *sb) 756 { 757 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 758 759 dout("ceph_umount_begin - starting forced umount\n"); 760 if (!fsc) 761 return; 762 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 763 ceph_mdsc_force_umount(fsc->mdsc); 764 return; 765 } 766 767 static const struct super_operations ceph_super_ops = { 768 .alloc_inode = ceph_alloc_inode, 769 .destroy_inode = ceph_destroy_inode, 770 .write_inode = ceph_write_inode, 771 .drop_inode = ceph_drop_inode, 772 .sync_fs = ceph_sync_fs, 773 .put_super = ceph_put_super, 774 .show_options = ceph_show_options, 775 .statfs = ceph_statfs, 776 .umount_begin = ceph_umount_begin, 777 }; 778 779 /* 780 * Bootstrap mount by opening the root directory. Note the mount 781 * @started time from caller, and time out if this takes too long. 782 */ 783 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 784 const char *path, 785 unsigned long started) 786 { 787 struct ceph_mds_client *mdsc = fsc->mdsc; 788 struct ceph_mds_request *req = NULL; 789 int err; 790 struct dentry *root; 791 792 /* open dir */ 793 dout("open_root_inode opening '%s'\n", path); 794 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 795 if (IS_ERR(req)) 796 return ERR_CAST(req); 797 req->r_path1 = kstrdup(path, GFP_NOFS); 798 if (!req->r_path1) { 799 root = ERR_PTR(-ENOMEM); 800 goto out; 801 } 802 803 req->r_ino1.ino = CEPH_INO_ROOT; 804 req->r_ino1.snap = CEPH_NOSNAP; 805 req->r_started = started; 806 req->r_timeout = fsc->client->options->mount_timeout; 807 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 808 req->r_num_caps = 2; 809 err = ceph_mdsc_do_request(mdsc, NULL, req); 810 if (err == 0) { 811 struct inode *inode = req->r_target_inode; 812 req->r_target_inode = NULL; 813 dout("open_root_inode success\n"); 814 root = d_make_root(inode); 815 if (!root) { 816 root = ERR_PTR(-ENOMEM); 817 goto out; 818 } 819 dout("open_root_inode success, root dentry is %p\n", root); 820 } else { 821 root = ERR_PTR(err); 822 } 823 out: 824 ceph_mdsc_put_request(req); 825 return root; 826 } 827 828 829 830 831 /* 832 * mount: join the ceph cluster, and open root directory. 833 */ 834 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) 835 { 836 int err; 837 unsigned long started = jiffies; /* note the start time */ 838 struct dentry *root; 839 int first = 0; /* first vfsmount for this super_block */ 840 841 dout("mount start %p\n", fsc); 842 mutex_lock(&fsc->client->mount_mutex); 843 844 if (!fsc->sb->s_root) { 845 const char *path; 846 err = __ceph_open_session(fsc->client, started); 847 if (err < 0) 848 goto out; 849 850 /* setup fscache */ 851 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 852 err = ceph_fscache_register_fs(fsc); 853 if (err < 0) 854 goto out; 855 } 856 857 if (!fsc->mount_options->server_path) { 858 path = ""; 859 dout("mount opening path \\t\n"); 860 } else { 861 path = fsc->mount_options->server_path + 1; 862 dout("mount opening path %s\n", path); 863 } 864 root = open_root_dentry(fsc, path, started); 865 if (IS_ERR(root)) { 866 err = PTR_ERR(root); 867 goto out; 868 } 869 fsc->sb->s_root = dget(root); 870 first = 1; 871 872 err = ceph_fs_debugfs_init(fsc); 873 if (err < 0) 874 goto fail; 875 } else { 876 root = dget(fsc->sb->s_root); 877 } 878 879 fsc->mount_state = CEPH_MOUNT_MOUNTED; 880 dout("mount success\n"); 881 mutex_unlock(&fsc->client->mount_mutex); 882 return root; 883 884 fail: 885 if (first) { 886 dput(fsc->sb->s_root); 887 fsc->sb->s_root = NULL; 888 } 889 out: 890 mutex_unlock(&fsc->client->mount_mutex); 891 return ERR_PTR(err); 892 } 893 894 static int ceph_set_super(struct super_block *s, void *data) 895 { 896 struct ceph_fs_client *fsc = data; 897 int ret; 898 899 dout("set_super %p data %p\n", s, data); 900 901 s->s_flags = fsc->mount_options->sb_flags; 902 s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ 903 904 s->s_xattr = ceph_xattr_handlers; 905 s->s_fs_info = fsc; 906 fsc->sb = s; 907 908 s->s_op = &ceph_super_ops; 909 s->s_d_op = &ceph_dentry_ops; 910 s->s_export_op = &ceph_export_ops; 911 912 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 913 914 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 915 if (ret != 0) 916 goto fail; 917 918 return ret; 919 920 fail: 921 s->s_fs_info = NULL; 922 fsc->sb = NULL; 923 return ret; 924 } 925 926 /* 927 * share superblock if same fs AND options 928 */ 929 static int ceph_compare_super(struct super_block *sb, void *data) 930 { 931 struct ceph_fs_client *new = data; 932 struct ceph_mount_options *fsopt = new->mount_options; 933 struct ceph_options *opt = new->client->options; 934 struct ceph_fs_client *other = ceph_sb_to_client(sb); 935 936 dout("ceph_compare_super %p\n", sb); 937 938 if (compare_mount_options(fsopt, opt, other)) { 939 dout("monitor(s)/mount options don't match\n"); 940 return 0; 941 } 942 if ((opt->flags & CEPH_OPT_FSID) && 943 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 944 dout("fsid doesn't match\n"); 945 return 0; 946 } 947 if (fsopt->sb_flags != other->mount_options->sb_flags) { 948 dout("flags differ\n"); 949 return 0; 950 } 951 return 1; 952 } 953 954 /* 955 * construct our own bdi so we can control readahead, etc. 956 */ 957 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 958 959 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 960 { 961 int err; 962 963 err = super_setup_bdi_name(sb, "ceph-%ld", 964 atomic_long_inc_return(&bdi_seq)); 965 if (err) 966 return err; 967 968 /* set ra_pages based on rasize mount option? */ 969 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 970 971 /* set io_pages based on max osd read size */ 972 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 973 974 return 0; 975 } 976 977 static struct dentry *ceph_mount(struct file_system_type *fs_type, 978 int flags, const char *dev_name, void *data) 979 { 980 struct super_block *sb; 981 struct ceph_fs_client *fsc; 982 struct dentry *res; 983 int err; 984 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 985 struct ceph_mount_options *fsopt = NULL; 986 struct ceph_options *opt = NULL; 987 988 dout("ceph_mount\n"); 989 990 #ifdef CONFIG_CEPH_FS_POSIX_ACL 991 flags |= SB_POSIXACL; 992 #endif 993 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); 994 if (err < 0) { 995 res = ERR_PTR(err); 996 goto out_final; 997 } 998 999 /* create client (which we may/may not use) */ 1000 fsc = create_fs_client(fsopt, opt); 1001 if (IS_ERR(fsc)) { 1002 res = ERR_CAST(fsc); 1003 destroy_mount_options(fsopt); 1004 ceph_destroy_options(opt); 1005 goto out_final; 1006 } 1007 1008 err = ceph_mdsc_init(fsc); 1009 if (err < 0) { 1010 res = ERR_PTR(err); 1011 goto out; 1012 } 1013 1014 if (ceph_test_opt(fsc->client, NOSHARE)) 1015 compare_super = NULL; 1016 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 1017 if (IS_ERR(sb)) { 1018 res = ERR_CAST(sb); 1019 goto out; 1020 } 1021 1022 if (ceph_sb_to_client(sb) != fsc) { 1023 ceph_mdsc_destroy(fsc); 1024 destroy_fs_client(fsc); 1025 fsc = ceph_sb_to_client(sb); 1026 dout("get_sb got existing client %p\n", fsc); 1027 } else { 1028 dout("get_sb using new client %p\n", fsc); 1029 err = ceph_setup_bdi(sb, fsc); 1030 if (err < 0) { 1031 res = ERR_PTR(err); 1032 goto out_splat; 1033 } 1034 } 1035 1036 res = ceph_real_mount(fsc); 1037 if (IS_ERR(res)) 1038 goto out_splat; 1039 dout("root %p inode %p ino %llx.%llx\n", res, 1040 d_inode(res), ceph_vinop(d_inode(res))); 1041 return res; 1042 1043 out_splat: 1044 ceph_mdsc_close_sessions(fsc->mdsc); 1045 deactivate_locked_super(sb); 1046 goto out_final; 1047 1048 out: 1049 ceph_mdsc_destroy(fsc); 1050 destroy_fs_client(fsc); 1051 out_final: 1052 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 1053 return res; 1054 } 1055 1056 static void ceph_kill_sb(struct super_block *s) 1057 { 1058 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1059 dev_t dev = s->s_dev; 1060 1061 dout("kill_sb %p\n", s); 1062 1063 ceph_mdsc_pre_umount(fsc->mdsc); 1064 generic_shutdown_super(s); 1065 1066 fsc->client->extra_mon_dispatch = NULL; 1067 ceph_fs_debugfs_cleanup(fsc); 1068 1069 ceph_fscache_unregister_fs(fsc); 1070 1071 ceph_mdsc_destroy(fsc); 1072 1073 destroy_fs_client(fsc); 1074 free_anon_bdev(dev); 1075 } 1076 1077 static struct file_system_type ceph_fs_type = { 1078 .owner = THIS_MODULE, 1079 .name = "ceph", 1080 .mount = ceph_mount, 1081 .kill_sb = ceph_kill_sb, 1082 .fs_flags = FS_RENAME_DOES_D_MOVE, 1083 }; 1084 MODULE_ALIAS_FS("ceph"); 1085 1086 static int __init init_ceph(void) 1087 { 1088 int ret = init_caches(); 1089 if (ret) 1090 goto out; 1091 1092 ceph_flock_init(); 1093 ceph_xattr_init(); 1094 ret = register_filesystem(&ceph_fs_type); 1095 if (ret) 1096 goto out_xattr; 1097 1098 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1099 1100 return 0; 1101 1102 out_xattr: 1103 ceph_xattr_exit(); 1104 destroy_caches(); 1105 out: 1106 return ret; 1107 } 1108 1109 static void __exit exit_ceph(void) 1110 { 1111 dout("exit_ceph\n"); 1112 unregister_filesystem(&ceph_fs_type); 1113 ceph_xattr_exit(); 1114 destroy_caches(); 1115 } 1116 1117 module_init(init_ceph); 1118 module_exit(exit_ceph); 1119 1120 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1121 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1122 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1123 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1124 MODULE_LICENSE("GPL"); 1125