1 2 #include <linux/ceph/ceph_debug.h> 3 4 #include <linux/backing-dev.h> 5 #include <linux/ctype.h> 6 #include <linux/fs.h> 7 #include <linux/inet.h> 8 #include <linux/in6.h> 9 #include <linux/module.h> 10 #include <linux/mount.h> 11 #include <linux/parser.h> 12 #include <linux/sched.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/statfs.h> 16 #include <linux/string.h> 17 18 #include "super.h" 19 #include "mds_client.h" 20 #include "cache.h" 21 22 #include <linux/ceph/ceph_features.h> 23 #include <linux/ceph/decode.h> 24 #include <linux/ceph/mon_client.h> 25 #include <linux/ceph/auth.h> 26 #include <linux/ceph/debugfs.h> 27 28 /* 29 * Ceph superblock operations 30 * 31 * Handle the basics of mounting, unmounting. 32 */ 33 34 /* 35 * super ops 36 */ 37 static void ceph_put_super(struct super_block *s) 38 { 39 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 40 41 dout("put_super\n"); 42 ceph_mdsc_close_sessions(fsc->mdsc); 43 } 44 45 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 46 { 47 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 48 struct ceph_monmap *monmap = fsc->client->monc.monmap; 49 struct ceph_statfs st; 50 u64 fsid; 51 int err; 52 u64 data_pool; 53 54 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 55 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 56 } else { 57 data_pool = CEPH_NOPOOL; 58 } 59 60 dout("statfs\n"); 61 err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st); 62 if (err < 0) 63 return err; 64 65 /* fill in kstatfs */ 66 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 67 68 /* 69 * express utilization in terms of large blocks to avoid 70 * overflow on 32-bit machines. 71 * 72 * NOTE: for the time being, we make bsize == frsize to humor 73 * not-yet-ancient versions of glibc that are broken. 74 * Someday, we will probably want to report a real block 75 * size... whatever that may mean for a network file system! 76 */ 77 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 78 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 79 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 80 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 81 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 82 83 buf->f_files = le64_to_cpu(st.num_objects); 84 buf->f_ffree = -1; 85 buf->f_namelen = NAME_MAX; 86 87 /* Must convert the fsid, for consistent values across arches */ 88 fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^ 89 le64_to_cpu(*((__le64 *)&monmap->fsid + 1)); 90 buf->f_fsid.val[0] = fsid & 0xffffffff; 91 buf->f_fsid.val[1] = fsid >> 32; 92 93 return 0; 94 } 95 96 97 static int ceph_sync_fs(struct super_block *sb, int wait) 98 { 99 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 100 101 if (!wait) { 102 dout("sync_fs (non-blocking)\n"); 103 ceph_flush_dirty_caps(fsc->mdsc); 104 dout("sync_fs (non-blocking) done\n"); 105 return 0; 106 } 107 108 dout("sync_fs (blocking)\n"); 109 ceph_osdc_sync(&fsc->client->osdc); 110 ceph_mdsc_sync(fsc->mdsc); 111 dout("sync_fs (blocking) done\n"); 112 return 0; 113 } 114 115 /* 116 * mount options 117 */ 118 enum { 119 Opt_wsize, 120 Opt_rsize, 121 Opt_rasize, 122 Opt_caps_wanted_delay_min, 123 Opt_caps_wanted_delay_max, 124 Opt_readdir_max_entries, 125 Opt_readdir_max_bytes, 126 Opt_congestion_kb, 127 Opt_last_int, 128 /* int args above */ 129 Opt_snapdirname, 130 Opt_mds_namespace, 131 Opt_fscache_uniq, 132 Opt_last_string, 133 /* string args above */ 134 Opt_dirstat, 135 Opt_nodirstat, 136 Opt_rbytes, 137 Opt_norbytes, 138 Opt_asyncreaddir, 139 Opt_noasyncreaddir, 140 Opt_dcache, 141 Opt_nodcache, 142 Opt_ino32, 143 Opt_noino32, 144 Opt_fscache, 145 Opt_nofscache, 146 Opt_poolperm, 147 Opt_nopoolperm, 148 Opt_require_active_mds, 149 Opt_norequire_active_mds, 150 #ifdef CONFIG_CEPH_FS_POSIX_ACL 151 Opt_acl, 152 #endif 153 Opt_noacl, 154 }; 155 156 static match_table_t fsopt_tokens = { 157 {Opt_wsize, "wsize=%d"}, 158 {Opt_rsize, "rsize=%d"}, 159 {Opt_rasize, "rasize=%d"}, 160 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 161 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 162 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 163 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 164 {Opt_congestion_kb, "write_congestion_kb=%d"}, 165 /* int args above */ 166 {Opt_snapdirname, "snapdirname=%s"}, 167 {Opt_mds_namespace, "mds_namespace=%s"}, 168 {Opt_fscache_uniq, "fsc=%s"}, 169 /* string args above */ 170 {Opt_dirstat, "dirstat"}, 171 {Opt_nodirstat, "nodirstat"}, 172 {Opt_rbytes, "rbytes"}, 173 {Opt_norbytes, "norbytes"}, 174 {Opt_asyncreaddir, "asyncreaddir"}, 175 {Opt_noasyncreaddir, "noasyncreaddir"}, 176 {Opt_dcache, "dcache"}, 177 {Opt_nodcache, "nodcache"}, 178 {Opt_ino32, "ino32"}, 179 {Opt_noino32, "noino32"}, 180 {Opt_fscache, "fsc"}, 181 {Opt_nofscache, "nofsc"}, 182 {Opt_poolperm, "poolperm"}, 183 {Opt_nopoolperm, "nopoolperm"}, 184 {Opt_require_active_mds, "require_active_mds"}, 185 {Opt_norequire_active_mds, "norequire_active_mds"}, 186 #ifdef CONFIG_CEPH_FS_POSIX_ACL 187 {Opt_acl, "acl"}, 188 #endif 189 {Opt_noacl, "noacl"}, 190 {-1, NULL} 191 }; 192 193 static int parse_fsopt_token(char *c, void *private) 194 { 195 struct ceph_mount_options *fsopt = private; 196 substring_t argstr[MAX_OPT_ARGS]; 197 int token, intval, ret; 198 199 token = match_token((char *)c, fsopt_tokens, argstr); 200 if (token < 0) 201 return -EINVAL; 202 203 if (token < Opt_last_int) { 204 ret = match_int(&argstr[0], &intval); 205 if (ret < 0) { 206 pr_err("bad mount option arg (not int) " 207 "at '%s'\n", c); 208 return ret; 209 } 210 dout("got int token %d val %d\n", token, intval); 211 } else if (token > Opt_last_int && token < Opt_last_string) { 212 dout("got string token %d val %s\n", token, 213 argstr[0].from); 214 } else { 215 dout("got token %d\n", token); 216 } 217 218 switch (token) { 219 case Opt_snapdirname: 220 kfree(fsopt->snapdir_name); 221 fsopt->snapdir_name = kstrndup(argstr[0].from, 222 argstr[0].to-argstr[0].from, 223 GFP_KERNEL); 224 if (!fsopt->snapdir_name) 225 return -ENOMEM; 226 break; 227 case Opt_mds_namespace: 228 kfree(fsopt->mds_namespace); 229 fsopt->mds_namespace = kstrndup(argstr[0].from, 230 argstr[0].to-argstr[0].from, 231 GFP_KERNEL); 232 if (!fsopt->mds_namespace) 233 return -ENOMEM; 234 break; 235 case Opt_fscache_uniq: 236 kfree(fsopt->fscache_uniq); 237 fsopt->fscache_uniq = kstrndup(argstr[0].from, 238 argstr[0].to-argstr[0].from, 239 GFP_KERNEL); 240 if (!fsopt->fscache_uniq) 241 return -ENOMEM; 242 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 243 break; 244 /* misc */ 245 case Opt_wsize: 246 if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 247 return -EINVAL; 248 fsopt->wsize = ALIGN(intval, PAGE_SIZE); 249 break; 250 case Opt_rsize: 251 if (intval < PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 252 return -EINVAL; 253 fsopt->rsize = ALIGN(intval, PAGE_SIZE); 254 break; 255 case Opt_rasize: 256 if (intval < 0) 257 return -EINVAL; 258 fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE); 259 break; 260 case Opt_caps_wanted_delay_min: 261 if (intval < 1) 262 return -EINVAL; 263 fsopt->caps_wanted_delay_min = intval; 264 break; 265 case Opt_caps_wanted_delay_max: 266 if (intval < 1) 267 return -EINVAL; 268 fsopt->caps_wanted_delay_max = intval; 269 break; 270 case Opt_readdir_max_entries: 271 if (intval < 1) 272 return -EINVAL; 273 fsopt->max_readdir = intval; 274 break; 275 case Opt_readdir_max_bytes: 276 if (intval < PAGE_SIZE && intval != 0) 277 return -EINVAL; 278 fsopt->max_readdir_bytes = intval; 279 break; 280 case Opt_congestion_kb: 281 if (intval < 1024) /* at least 1M */ 282 return -EINVAL; 283 fsopt->congestion_kb = intval; 284 break; 285 case Opt_dirstat: 286 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 287 break; 288 case Opt_nodirstat: 289 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 290 break; 291 case Opt_rbytes: 292 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 293 break; 294 case Opt_norbytes: 295 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 296 break; 297 case Opt_asyncreaddir: 298 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 299 break; 300 case Opt_noasyncreaddir: 301 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 302 break; 303 case Opt_dcache: 304 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 305 break; 306 case Opt_nodcache: 307 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 308 break; 309 case Opt_ino32: 310 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 311 break; 312 case Opt_noino32: 313 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 314 break; 315 case Opt_fscache: 316 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 317 break; 318 case Opt_nofscache: 319 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 320 break; 321 case Opt_poolperm: 322 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 323 printk ("pool perm"); 324 break; 325 case Opt_nopoolperm: 326 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 327 break; 328 case Opt_require_active_mds: 329 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 330 break; 331 case Opt_norequire_active_mds: 332 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 333 break; 334 #ifdef CONFIG_CEPH_FS_POSIX_ACL 335 case Opt_acl: 336 fsopt->sb_flags |= SB_POSIXACL; 337 break; 338 #endif 339 case Opt_noacl: 340 fsopt->sb_flags &= ~SB_POSIXACL; 341 break; 342 default: 343 BUG_ON(token); 344 } 345 return 0; 346 } 347 348 static void destroy_mount_options(struct ceph_mount_options *args) 349 { 350 dout("destroy_mount_options %p\n", args); 351 kfree(args->snapdir_name); 352 kfree(args->mds_namespace); 353 kfree(args->server_path); 354 kfree(args->fscache_uniq); 355 kfree(args); 356 } 357 358 static int strcmp_null(const char *s1, const char *s2) 359 { 360 if (!s1 && !s2) 361 return 0; 362 if (s1 && !s2) 363 return -1; 364 if (!s1 && s2) 365 return 1; 366 return strcmp(s1, s2); 367 } 368 369 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 370 struct ceph_options *new_opt, 371 struct ceph_fs_client *fsc) 372 { 373 struct ceph_mount_options *fsopt1 = new_fsopt; 374 struct ceph_mount_options *fsopt2 = fsc->mount_options; 375 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 376 int ret; 377 378 ret = memcmp(fsopt1, fsopt2, ofs); 379 if (ret) 380 return ret; 381 382 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 383 if (ret) 384 return ret; 385 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 386 if (ret) 387 return ret; 388 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 389 if (ret) 390 return ret; 391 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 392 if (ret) 393 return ret; 394 395 return ceph_compare_options(new_opt, fsc->client); 396 } 397 398 static int parse_mount_options(struct ceph_mount_options **pfsopt, 399 struct ceph_options **popt, 400 int flags, char *options, 401 const char *dev_name) 402 { 403 struct ceph_mount_options *fsopt; 404 const char *dev_name_end; 405 int err; 406 407 if (!dev_name || !*dev_name) 408 return -EINVAL; 409 410 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 411 if (!fsopt) 412 return -ENOMEM; 413 414 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 415 416 fsopt->sb_flags = flags; 417 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 418 419 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 420 fsopt->rsize = CEPH_MAX_READ_SIZE; 421 fsopt->rasize = CEPH_RASIZE_DEFAULT; 422 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 423 if (!fsopt->snapdir_name) { 424 err = -ENOMEM; 425 goto out; 426 } 427 428 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 429 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 430 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 431 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 432 fsopt->congestion_kb = default_congestion_kb(); 433 434 /* 435 * Distinguish the server list from the path in "dev_name". 436 * Internally we do not include the leading '/' in the path. 437 * 438 * "dev_name" will look like: 439 * <server_spec>[,<server_spec>...]:[<path>] 440 * where 441 * <server_spec> is <ip>[:<port>] 442 * <path> is optional, but if present must begin with '/' 443 */ 444 dev_name_end = strchr(dev_name, '/'); 445 if (dev_name_end) { 446 if (strlen(dev_name_end) > 1) { 447 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 448 if (!fsopt->server_path) { 449 err = -ENOMEM; 450 goto out; 451 } 452 } 453 } else { 454 dev_name_end = dev_name + strlen(dev_name); 455 } 456 err = -EINVAL; 457 dev_name_end--; /* back up to ':' separator */ 458 if (dev_name_end < dev_name || *dev_name_end != ':') { 459 pr_err("device name is missing path (no : separator in %s)\n", 460 dev_name); 461 goto out; 462 } 463 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 464 if (fsopt->server_path) 465 dout("server path '%s'\n", fsopt->server_path); 466 467 *popt = ceph_parse_options(options, dev_name, dev_name_end, 468 parse_fsopt_token, (void *)fsopt); 469 if (IS_ERR(*popt)) { 470 err = PTR_ERR(*popt); 471 goto out; 472 } 473 474 /* success */ 475 *pfsopt = fsopt; 476 return 0; 477 478 out: 479 destroy_mount_options(fsopt); 480 return err; 481 } 482 483 /** 484 * ceph_show_options - Show mount options in /proc/mounts 485 * @m: seq_file to write to 486 * @root: root of that (sub)tree 487 */ 488 static int ceph_show_options(struct seq_file *m, struct dentry *root) 489 { 490 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 491 struct ceph_mount_options *fsopt = fsc->mount_options; 492 size_t pos; 493 int ret; 494 495 /* a comma between MNT/MS and client options */ 496 seq_putc(m, ','); 497 pos = m->count; 498 499 ret = ceph_print_client_options(m, fsc->client); 500 if (ret) 501 return ret; 502 503 /* retract our comma if no client options */ 504 if (m->count == pos) 505 m->count--; 506 507 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 508 seq_puts(m, ",dirstat"); 509 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 510 seq_puts(m, ",rbytes"); 511 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 512 seq_puts(m, ",noasyncreaddir"); 513 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 514 seq_puts(m, ",nodcache"); 515 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 516 if (fsopt->fscache_uniq) 517 seq_printf(m, ",fsc=%s", fsopt->fscache_uniq); 518 else 519 seq_puts(m, ",fsc"); 520 } 521 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 522 seq_puts(m, ",nopoolperm"); 523 524 #ifdef CONFIG_CEPH_FS_POSIX_ACL 525 if (fsopt->sb_flags & SB_POSIXACL) 526 seq_puts(m, ",acl"); 527 else 528 seq_puts(m, ",noacl"); 529 #endif 530 531 if (fsopt->mds_namespace) 532 seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace); 533 if (fsopt->wsize) 534 seq_printf(m, ",wsize=%d", fsopt->wsize); 535 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 536 seq_printf(m, ",rsize=%d", fsopt->rsize); 537 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 538 seq_printf(m, ",rasize=%d", fsopt->rasize); 539 if (fsopt->congestion_kb != default_congestion_kb()) 540 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 541 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 542 seq_printf(m, ",caps_wanted_delay_min=%d", 543 fsopt->caps_wanted_delay_min); 544 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 545 seq_printf(m, ",caps_wanted_delay_max=%d", 546 fsopt->caps_wanted_delay_max); 547 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 548 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 549 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 550 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 551 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 552 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 553 554 return 0; 555 } 556 557 /* 558 * handle any mon messages the standard library doesn't understand. 559 * return error if we don't either. 560 */ 561 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 562 { 563 struct ceph_fs_client *fsc = client->private; 564 int type = le16_to_cpu(msg->hdr.type); 565 566 switch (type) { 567 case CEPH_MSG_MDS_MAP: 568 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 569 return 0; 570 case CEPH_MSG_FS_MAP_USER: 571 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 572 return 0; 573 default: 574 return -1; 575 } 576 } 577 578 /* 579 * create a new fs client 580 */ 581 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 582 struct ceph_options *opt) 583 { 584 struct ceph_fs_client *fsc; 585 int page_count; 586 size_t size; 587 int err = -ENOMEM; 588 589 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 590 if (!fsc) 591 return ERR_PTR(-ENOMEM); 592 593 fsc->client = ceph_create_client(opt, fsc); 594 if (IS_ERR(fsc->client)) { 595 err = PTR_ERR(fsc->client); 596 goto fail; 597 } 598 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 599 600 if (!fsopt->mds_namespace) { 601 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 602 0, true); 603 } else { 604 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 605 0, false); 606 } 607 608 fsc->mount_options = fsopt; 609 610 fsc->sb = NULL; 611 fsc->mount_state = CEPH_MOUNT_MOUNTING; 612 613 atomic_long_set(&fsc->writeback_count, 0); 614 615 err = -ENOMEM; 616 /* 617 * The number of concurrent works can be high but they don't need 618 * to be processed in parallel, limit concurrency. 619 */ 620 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 621 if (!fsc->wb_wq) 622 goto fail_client; 623 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 624 if (!fsc->pg_inv_wq) 625 goto fail_wb_wq; 626 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 627 if (!fsc->trunc_wq) 628 goto fail_pg_inv_wq; 629 630 /* set up mempools */ 631 err = -ENOMEM; 632 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 633 size = sizeof (struct page *) * (page_count ? page_count : 1); 634 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 635 if (!fsc->wb_pagevec_pool) 636 goto fail_trunc_wq; 637 638 /* caps */ 639 fsc->min_caps = fsopt->max_readdir; 640 641 return fsc; 642 643 fail_trunc_wq: 644 destroy_workqueue(fsc->trunc_wq); 645 fail_pg_inv_wq: 646 destroy_workqueue(fsc->pg_inv_wq); 647 fail_wb_wq: 648 destroy_workqueue(fsc->wb_wq); 649 fail_client: 650 ceph_destroy_client(fsc->client); 651 fail: 652 kfree(fsc); 653 return ERR_PTR(err); 654 } 655 656 static void destroy_fs_client(struct ceph_fs_client *fsc) 657 { 658 dout("destroy_fs_client %p\n", fsc); 659 660 destroy_workqueue(fsc->wb_wq); 661 destroy_workqueue(fsc->pg_inv_wq); 662 destroy_workqueue(fsc->trunc_wq); 663 664 mempool_destroy(fsc->wb_pagevec_pool); 665 666 destroy_mount_options(fsc->mount_options); 667 668 ceph_destroy_client(fsc->client); 669 670 kfree(fsc); 671 dout("destroy_fs_client %p done\n", fsc); 672 } 673 674 /* 675 * caches 676 */ 677 struct kmem_cache *ceph_inode_cachep; 678 struct kmem_cache *ceph_cap_cachep; 679 struct kmem_cache *ceph_cap_flush_cachep; 680 struct kmem_cache *ceph_dentry_cachep; 681 struct kmem_cache *ceph_file_cachep; 682 683 static void ceph_inode_init_once(void *foo) 684 { 685 struct ceph_inode_info *ci = foo; 686 inode_init_once(&ci->vfs_inode); 687 } 688 689 static int __init init_caches(void) 690 { 691 int error = -ENOMEM; 692 693 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 694 sizeof(struct ceph_inode_info), 695 __alignof__(struct ceph_inode_info), 696 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 697 SLAB_ACCOUNT, ceph_inode_init_once); 698 if (!ceph_inode_cachep) 699 return -ENOMEM; 700 701 ceph_cap_cachep = KMEM_CACHE(ceph_cap, 702 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 703 if (!ceph_cap_cachep) 704 goto bad_cap; 705 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 706 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 707 if (!ceph_cap_flush_cachep) 708 goto bad_cap_flush; 709 710 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 711 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 712 if (!ceph_dentry_cachep) 713 goto bad_dentry; 714 715 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 716 if (!ceph_file_cachep) 717 goto bad_file; 718 719 error = ceph_fscache_register(); 720 if (error) 721 goto bad_fscache; 722 723 return 0; 724 725 bad_fscache: 726 kmem_cache_destroy(ceph_file_cachep); 727 bad_file: 728 kmem_cache_destroy(ceph_dentry_cachep); 729 bad_dentry: 730 kmem_cache_destroy(ceph_cap_flush_cachep); 731 bad_cap_flush: 732 kmem_cache_destroy(ceph_cap_cachep); 733 bad_cap: 734 kmem_cache_destroy(ceph_inode_cachep); 735 return error; 736 } 737 738 static void destroy_caches(void) 739 { 740 /* 741 * Make sure all delayed rcu free inodes are flushed before we 742 * destroy cache. 743 */ 744 rcu_barrier(); 745 746 kmem_cache_destroy(ceph_inode_cachep); 747 kmem_cache_destroy(ceph_cap_cachep); 748 kmem_cache_destroy(ceph_cap_flush_cachep); 749 kmem_cache_destroy(ceph_dentry_cachep); 750 kmem_cache_destroy(ceph_file_cachep); 751 752 ceph_fscache_unregister(); 753 } 754 755 756 /* 757 * ceph_umount_begin - initiate forced umount. Tear down down the 758 * mount, skipping steps that may hang while waiting for server(s). 759 */ 760 static void ceph_umount_begin(struct super_block *sb) 761 { 762 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 763 764 dout("ceph_umount_begin - starting forced umount\n"); 765 if (!fsc) 766 return; 767 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 768 ceph_mdsc_force_umount(fsc->mdsc); 769 return; 770 } 771 772 static const struct super_operations ceph_super_ops = { 773 .alloc_inode = ceph_alloc_inode, 774 .destroy_inode = ceph_destroy_inode, 775 .write_inode = ceph_write_inode, 776 .drop_inode = ceph_drop_inode, 777 .sync_fs = ceph_sync_fs, 778 .put_super = ceph_put_super, 779 .show_options = ceph_show_options, 780 .statfs = ceph_statfs, 781 .umount_begin = ceph_umount_begin, 782 }; 783 784 /* 785 * Bootstrap mount by opening the root directory. Note the mount 786 * @started time from caller, and time out if this takes too long. 787 */ 788 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 789 const char *path, 790 unsigned long started) 791 { 792 struct ceph_mds_client *mdsc = fsc->mdsc; 793 struct ceph_mds_request *req = NULL; 794 int err; 795 struct dentry *root; 796 797 /* open dir */ 798 dout("open_root_inode opening '%s'\n", path); 799 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 800 if (IS_ERR(req)) 801 return ERR_CAST(req); 802 req->r_path1 = kstrdup(path, GFP_NOFS); 803 if (!req->r_path1) { 804 root = ERR_PTR(-ENOMEM); 805 goto out; 806 } 807 808 req->r_ino1.ino = CEPH_INO_ROOT; 809 req->r_ino1.snap = CEPH_NOSNAP; 810 req->r_started = started; 811 req->r_timeout = fsc->client->options->mount_timeout; 812 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 813 req->r_num_caps = 2; 814 err = ceph_mdsc_do_request(mdsc, NULL, req); 815 if (err == 0) { 816 struct inode *inode = req->r_target_inode; 817 req->r_target_inode = NULL; 818 dout("open_root_inode success\n"); 819 root = d_make_root(inode); 820 if (!root) { 821 root = ERR_PTR(-ENOMEM); 822 goto out; 823 } 824 dout("open_root_inode success, root dentry is %p\n", root); 825 } else { 826 root = ERR_PTR(err); 827 } 828 out: 829 ceph_mdsc_put_request(req); 830 return root; 831 } 832 833 834 835 836 /* 837 * mount: join the ceph cluster, and open root directory. 838 */ 839 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) 840 { 841 int err; 842 unsigned long started = jiffies; /* note the start time */ 843 struct dentry *root; 844 845 dout("mount start %p\n", fsc); 846 mutex_lock(&fsc->client->mount_mutex); 847 848 if (!fsc->sb->s_root) { 849 const char *path; 850 err = __ceph_open_session(fsc->client, started); 851 if (err < 0) 852 goto out; 853 854 /* setup fscache */ 855 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 856 err = ceph_fscache_register_fs(fsc); 857 if (err < 0) 858 goto out; 859 } 860 861 if (!fsc->mount_options->server_path) { 862 path = ""; 863 dout("mount opening path \\t\n"); 864 } else { 865 path = fsc->mount_options->server_path + 1; 866 dout("mount opening path %s\n", path); 867 } 868 869 err = ceph_fs_debugfs_init(fsc); 870 if (err < 0) 871 goto out; 872 873 root = open_root_dentry(fsc, path, started); 874 if (IS_ERR(root)) { 875 err = PTR_ERR(root); 876 goto out; 877 } 878 fsc->sb->s_root = dget(root); 879 } else { 880 root = dget(fsc->sb->s_root); 881 } 882 883 fsc->mount_state = CEPH_MOUNT_MOUNTED; 884 dout("mount success\n"); 885 mutex_unlock(&fsc->client->mount_mutex); 886 return root; 887 888 out: 889 mutex_unlock(&fsc->client->mount_mutex); 890 return ERR_PTR(err); 891 } 892 893 static int ceph_set_super(struct super_block *s, void *data) 894 { 895 struct ceph_fs_client *fsc = data; 896 int ret; 897 898 dout("set_super %p data %p\n", s, data); 899 900 s->s_flags = fsc->mount_options->sb_flags; 901 s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ 902 903 s->s_xattr = ceph_xattr_handlers; 904 s->s_fs_info = fsc; 905 fsc->sb = s; 906 907 s->s_op = &ceph_super_ops; 908 s->s_d_op = &ceph_dentry_ops; 909 s->s_export_op = &ceph_export_ops; 910 911 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 912 913 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 914 if (ret != 0) 915 goto fail; 916 917 return ret; 918 919 fail: 920 s->s_fs_info = NULL; 921 fsc->sb = NULL; 922 return ret; 923 } 924 925 /* 926 * share superblock if same fs AND options 927 */ 928 static int ceph_compare_super(struct super_block *sb, void *data) 929 { 930 struct ceph_fs_client *new = data; 931 struct ceph_mount_options *fsopt = new->mount_options; 932 struct ceph_options *opt = new->client->options; 933 struct ceph_fs_client *other = ceph_sb_to_client(sb); 934 935 dout("ceph_compare_super %p\n", sb); 936 937 if (compare_mount_options(fsopt, opt, other)) { 938 dout("monitor(s)/mount options don't match\n"); 939 return 0; 940 } 941 if ((opt->flags & CEPH_OPT_FSID) && 942 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 943 dout("fsid doesn't match\n"); 944 return 0; 945 } 946 if (fsopt->sb_flags != other->mount_options->sb_flags) { 947 dout("flags differ\n"); 948 return 0; 949 } 950 return 1; 951 } 952 953 /* 954 * construct our own bdi so we can control readahead, etc. 955 */ 956 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 957 958 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 959 { 960 int err; 961 962 err = super_setup_bdi_name(sb, "ceph-%ld", 963 atomic_long_inc_return(&bdi_seq)); 964 if (err) 965 return err; 966 967 /* set ra_pages based on rasize mount option? */ 968 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 969 970 /* set io_pages based on max osd read size */ 971 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 972 973 return 0; 974 } 975 976 static struct dentry *ceph_mount(struct file_system_type *fs_type, 977 int flags, const char *dev_name, void *data) 978 { 979 struct super_block *sb; 980 struct ceph_fs_client *fsc; 981 struct dentry *res; 982 int err; 983 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 984 struct ceph_mount_options *fsopt = NULL; 985 struct ceph_options *opt = NULL; 986 987 dout("ceph_mount\n"); 988 989 #ifdef CONFIG_CEPH_FS_POSIX_ACL 990 flags |= SB_POSIXACL; 991 #endif 992 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); 993 if (err < 0) { 994 res = ERR_PTR(err); 995 goto out_final; 996 } 997 998 /* create client (which we may/may not use) */ 999 fsc = create_fs_client(fsopt, opt); 1000 if (IS_ERR(fsc)) { 1001 res = ERR_CAST(fsc); 1002 destroy_mount_options(fsopt); 1003 ceph_destroy_options(opt); 1004 goto out_final; 1005 } 1006 1007 err = ceph_mdsc_init(fsc); 1008 if (err < 0) { 1009 res = ERR_PTR(err); 1010 goto out; 1011 } 1012 1013 if (ceph_test_opt(fsc->client, NOSHARE)) 1014 compare_super = NULL; 1015 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 1016 if (IS_ERR(sb)) { 1017 res = ERR_CAST(sb); 1018 goto out; 1019 } 1020 1021 if (ceph_sb_to_client(sb) != fsc) { 1022 ceph_mdsc_destroy(fsc); 1023 destroy_fs_client(fsc); 1024 fsc = ceph_sb_to_client(sb); 1025 dout("get_sb got existing client %p\n", fsc); 1026 } else { 1027 dout("get_sb using new client %p\n", fsc); 1028 err = ceph_setup_bdi(sb, fsc); 1029 if (err < 0) { 1030 res = ERR_PTR(err); 1031 goto out_splat; 1032 } 1033 } 1034 1035 res = ceph_real_mount(fsc); 1036 if (IS_ERR(res)) 1037 goto out_splat; 1038 dout("root %p inode %p ino %llx.%llx\n", res, 1039 d_inode(res), ceph_vinop(d_inode(res))); 1040 return res; 1041 1042 out_splat: 1043 ceph_mdsc_close_sessions(fsc->mdsc); 1044 deactivate_locked_super(sb); 1045 goto out_final; 1046 1047 out: 1048 ceph_mdsc_destroy(fsc); 1049 destroy_fs_client(fsc); 1050 out_final: 1051 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 1052 return res; 1053 } 1054 1055 static void ceph_kill_sb(struct super_block *s) 1056 { 1057 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1058 dev_t dev = s->s_dev; 1059 1060 dout("kill_sb %p\n", s); 1061 1062 ceph_mdsc_pre_umount(fsc->mdsc); 1063 generic_shutdown_super(s); 1064 1065 fsc->client->extra_mon_dispatch = NULL; 1066 ceph_fs_debugfs_cleanup(fsc); 1067 1068 ceph_fscache_unregister_fs(fsc); 1069 1070 ceph_mdsc_destroy(fsc); 1071 1072 destroy_fs_client(fsc); 1073 free_anon_bdev(dev); 1074 } 1075 1076 static struct file_system_type ceph_fs_type = { 1077 .owner = THIS_MODULE, 1078 .name = "ceph", 1079 .mount = ceph_mount, 1080 .kill_sb = ceph_kill_sb, 1081 .fs_flags = FS_RENAME_DOES_D_MOVE, 1082 }; 1083 MODULE_ALIAS_FS("ceph"); 1084 1085 static int __init init_ceph(void) 1086 { 1087 int ret = init_caches(); 1088 if (ret) 1089 goto out; 1090 1091 ceph_flock_init(); 1092 ceph_xattr_init(); 1093 ret = register_filesystem(&ceph_fs_type); 1094 if (ret) 1095 goto out_xattr; 1096 1097 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1098 1099 return 0; 1100 1101 out_xattr: 1102 ceph_xattr_exit(); 1103 destroy_caches(); 1104 out: 1105 return ret; 1106 } 1107 1108 static void __exit exit_ceph(void) 1109 { 1110 dout("exit_ceph\n"); 1111 unregister_filesystem(&ceph_fs_type); 1112 ceph_xattr_exit(); 1113 destroy_caches(); 1114 } 1115 1116 module_init(init_ceph); 1117 module_exit(exit_ceph); 1118 1119 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1120 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1121 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1122 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1123 MODULE_LICENSE("GPL"); 1124