1 2 #include <linux/ceph/ceph_debug.h> 3 4 #include <linux/backing-dev.h> 5 #include <linux/ctype.h> 6 #include <linux/fs.h> 7 #include <linux/inet.h> 8 #include <linux/in6.h> 9 #include <linux/module.h> 10 #include <linux/mount.h> 11 #include <linux/parser.h> 12 #include <linux/sched.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/statfs.h> 16 #include <linux/string.h> 17 18 #include "super.h" 19 #include "mds_client.h" 20 #include "cache.h" 21 22 #include <linux/ceph/ceph_features.h> 23 #include <linux/ceph/decode.h> 24 #include <linux/ceph/mon_client.h> 25 #include <linux/ceph/auth.h> 26 #include <linux/ceph/debugfs.h> 27 28 /* 29 * Ceph superblock operations 30 * 31 * Handle the basics of mounting, unmounting. 32 */ 33 34 /* 35 * super ops 36 */ 37 static void ceph_put_super(struct super_block *s) 38 { 39 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 40 41 dout("put_super\n"); 42 ceph_mdsc_close_sessions(fsc->mdsc); 43 } 44 45 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 46 { 47 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 48 struct ceph_mon_client *monc = &fsc->client->monc; 49 struct ceph_statfs st; 50 u64 fsid; 51 int err; 52 u64 data_pool; 53 54 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 55 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 56 } else { 57 data_pool = CEPH_NOPOOL; 58 } 59 60 dout("statfs\n"); 61 err = ceph_monc_do_statfs(monc, data_pool, &st); 62 if (err < 0) 63 return err; 64 65 /* fill in kstatfs */ 66 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 67 68 /* 69 * express utilization in terms of large blocks to avoid 70 * overflow on 32-bit machines. 71 * 72 * NOTE: for the time being, we make bsize == frsize to humor 73 * not-yet-ancient versions of glibc that are broken. 74 * Someday, we will probably want to report a real block 75 * size... whatever that may mean for a network file system! 76 */ 77 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 78 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 79 80 /* 81 * By default use root quota for stats; fallback to overall filesystem 82 * usage if using 'noquotadf' mount option or if the root dir doesn't 83 * have max_bytes quota set. 84 */ 85 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 86 !ceph_quota_update_statfs(fsc, buf)) { 87 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 88 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 89 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 90 } 91 92 buf->f_files = le64_to_cpu(st.num_objects); 93 buf->f_ffree = -1; 94 buf->f_namelen = NAME_MAX; 95 96 /* Must convert the fsid, for consistent values across arches */ 97 mutex_lock(&monc->mutex); 98 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 99 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 100 mutex_unlock(&monc->mutex); 101 102 buf->f_fsid.val[0] = fsid & 0xffffffff; 103 buf->f_fsid.val[1] = fsid >> 32; 104 105 return 0; 106 } 107 108 109 static int ceph_sync_fs(struct super_block *sb, int wait) 110 { 111 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 112 113 if (!wait) { 114 dout("sync_fs (non-blocking)\n"); 115 ceph_flush_dirty_caps(fsc->mdsc); 116 dout("sync_fs (non-blocking) done\n"); 117 return 0; 118 } 119 120 dout("sync_fs (blocking)\n"); 121 ceph_osdc_sync(&fsc->client->osdc); 122 ceph_mdsc_sync(fsc->mdsc); 123 dout("sync_fs (blocking) done\n"); 124 return 0; 125 } 126 127 /* 128 * mount options 129 */ 130 enum { 131 Opt_wsize, 132 Opt_rsize, 133 Opt_rasize, 134 Opt_caps_wanted_delay_min, 135 Opt_caps_wanted_delay_max, 136 Opt_readdir_max_entries, 137 Opt_readdir_max_bytes, 138 Opt_congestion_kb, 139 Opt_last_int, 140 /* int args above */ 141 Opt_snapdirname, 142 Opt_mds_namespace, 143 Opt_fscache_uniq, 144 Opt_last_string, 145 /* string args above */ 146 Opt_dirstat, 147 Opt_nodirstat, 148 Opt_rbytes, 149 Opt_norbytes, 150 Opt_asyncreaddir, 151 Opt_noasyncreaddir, 152 Opt_dcache, 153 Opt_nodcache, 154 Opt_ino32, 155 Opt_noino32, 156 Opt_fscache, 157 Opt_nofscache, 158 Opt_poolperm, 159 Opt_nopoolperm, 160 Opt_require_active_mds, 161 Opt_norequire_active_mds, 162 #ifdef CONFIG_CEPH_FS_POSIX_ACL 163 Opt_acl, 164 #endif 165 Opt_noacl, 166 Opt_quotadf, 167 Opt_noquotadf, 168 }; 169 170 static match_table_t fsopt_tokens = { 171 {Opt_wsize, "wsize=%d"}, 172 {Opt_rsize, "rsize=%d"}, 173 {Opt_rasize, "rasize=%d"}, 174 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 175 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 176 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 177 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 178 {Opt_congestion_kb, "write_congestion_kb=%d"}, 179 /* int args above */ 180 {Opt_snapdirname, "snapdirname=%s"}, 181 {Opt_mds_namespace, "mds_namespace=%s"}, 182 {Opt_fscache_uniq, "fsc=%s"}, 183 /* string args above */ 184 {Opt_dirstat, "dirstat"}, 185 {Opt_nodirstat, "nodirstat"}, 186 {Opt_rbytes, "rbytes"}, 187 {Opt_norbytes, "norbytes"}, 188 {Opt_asyncreaddir, "asyncreaddir"}, 189 {Opt_noasyncreaddir, "noasyncreaddir"}, 190 {Opt_dcache, "dcache"}, 191 {Opt_nodcache, "nodcache"}, 192 {Opt_ino32, "ino32"}, 193 {Opt_noino32, "noino32"}, 194 {Opt_fscache, "fsc"}, 195 {Opt_nofscache, "nofsc"}, 196 {Opt_poolperm, "poolperm"}, 197 {Opt_nopoolperm, "nopoolperm"}, 198 {Opt_require_active_mds, "require_active_mds"}, 199 {Opt_norequire_active_mds, "norequire_active_mds"}, 200 #ifdef CONFIG_CEPH_FS_POSIX_ACL 201 {Opt_acl, "acl"}, 202 #endif 203 {Opt_noacl, "noacl"}, 204 {Opt_quotadf, "quotadf"}, 205 {Opt_noquotadf, "noquotadf"}, 206 {-1, NULL} 207 }; 208 209 static int parse_fsopt_token(char *c, void *private) 210 { 211 struct ceph_mount_options *fsopt = private; 212 substring_t argstr[MAX_OPT_ARGS]; 213 int token, intval, ret; 214 215 token = match_token((char *)c, fsopt_tokens, argstr); 216 if (token < 0) 217 return -EINVAL; 218 219 if (token < Opt_last_int) { 220 ret = match_int(&argstr[0], &intval); 221 if (ret < 0) { 222 pr_err("bad option arg (not int) at '%s'\n", c); 223 return ret; 224 } 225 dout("got int token %d val %d\n", token, intval); 226 } else if (token > Opt_last_int && token < Opt_last_string) { 227 dout("got string token %d val %s\n", token, 228 argstr[0].from); 229 } else { 230 dout("got token %d\n", token); 231 } 232 233 switch (token) { 234 case Opt_snapdirname: 235 kfree(fsopt->snapdir_name); 236 fsopt->snapdir_name = kstrndup(argstr[0].from, 237 argstr[0].to-argstr[0].from, 238 GFP_KERNEL); 239 if (!fsopt->snapdir_name) 240 return -ENOMEM; 241 break; 242 case Opt_mds_namespace: 243 kfree(fsopt->mds_namespace); 244 fsopt->mds_namespace = kstrndup(argstr[0].from, 245 argstr[0].to-argstr[0].from, 246 GFP_KERNEL); 247 if (!fsopt->mds_namespace) 248 return -ENOMEM; 249 break; 250 case Opt_fscache_uniq: 251 kfree(fsopt->fscache_uniq); 252 fsopt->fscache_uniq = kstrndup(argstr[0].from, 253 argstr[0].to-argstr[0].from, 254 GFP_KERNEL); 255 if (!fsopt->fscache_uniq) 256 return -ENOMEM; 257 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 258 break; 259 /* misc */ 260 case Opt_wsize: 261 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 262 return -EINVAL; 263 fsopt->wsize = ALIGN(intval, PAGE_SIZE); 264 break; 265 case Opt_rsize: 266 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 267 return -EINVAL; 268 fsopt->rsize = ALIGN(intval, PAGE_SIZE); 269 break; 270 case Opt_rasize: 271 if (intval < 0) 272 return -EINVAL; 273 fsopt->rasize = ALIGN(intval, PAGE_SIZE); 274 break; 275 case Opt_caps_wanted_delay_min: 276 if (intval < 1) 277 return -EINVAL; 278 fsopt->caps_wanted_delay_min = intval; 279 break; 280 case Opt_caps_wanted_delay_max: 281 if (intval < 1) 282 return -EINVAL; 283 fsopt->caps_wanted_delay_max = intval; 284 break; 285 case Opt_readdir_max_entries: 286 if (intval < 1) 287 return -EINVAL; 288 fsopt->max_readdir = intval; 289 break; 290 case Opt_readdir_max_bytes: 291 if (intval < (int)PAGE_SIZE && intval != 0) 292 return -EINVAL; 293 fsopt->max_readdir_bytes = intval; 294 break; 295 case Opt_congestion_kb: 296 if (intval < 1024) /* at least 1M */ 297 return -EINVAL; 298 fsopt->congestion_kb = intval; 299 break; 300 case Opt_dirstat: 301 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 302 break; 303 case Opt_nodirstat: 304 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 305 break; 306 case Opt_rbytes: 307 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 308 break; 309 case Opt_norbytes: 310 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 311 break; 312 case Opt_asyncreaddir: 313 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 314 break; 315 case Opt_noasyncreaddir: 316 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 317 break; 318 case Opt_dcache: 319 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 320 break; 321 case Opt_nodcache: 322 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 323 break; 324 case Opt_ino32: 325 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 326 break; 327 case Opt_noino32: 328 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 329 break; 330 case Opt_fscache: 331 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 332 kfree(fsopt->fscache_uniq); 333 fsopt->fscache_uniq = NULL; 334 break; 335 case Opt_nofscache: 336 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 337 kfree(fsopt->fscache_uniq); 338 fsopt->fscache_uniq = NULL; 339 break; 340 case Opt_poolperm: 341 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 342 break; 343 case Opt_nopoolperm: 344 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 345 break; 346 case Opt_require_active_mds: 347 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 348 break; 349 case Opt_norequire_active_mds: 350 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 351 break; 352 case Opt_quotadf: 353 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 354 break; 355 case Opt_noquotadf: 356 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 357 break; 358 #ifdef CONFIG_CEPH_FS_POSIX_ACL 359 case Opt_acl: 360 fsopt->sb_flags |= SB_POSIXACL; 361 break; 362 #endif 363 case Opt_noacl: 364 fsopt->sb_flags &= ~SB_POSIXACL; 365 break; 366 default: 367 BUG_ON(token); 368 } 369 return 0; 370 } 371 372 static void destroy_mount_options(struct ceph_mount_options *args) 373 { 374 dout("destroy_mount_options %p\n", args); 375 kfree(args->snapdir_name); 376 kfree(args->mds_namespace); 377 kfree(args->server_path); 378 kfree(args->fscache_uniq); 379 kfree(args); 380 } 381 382 static int strcmp_null(const char *s1, const char *s2) 383 { 384 if (!s1 && !s2) 385 return 0; 386 if (s1 && !s2) 387 return -1; 388 if (!s1 && s2) 389 return 1; 390 return strcmp(s1, s2); 391 } 392 393 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 394 struct ceph_options *new_opt, 395 struct ceph_fs_client *fsc) 396 { 397 struct ceph_mount_options *fsopt1 = new_fsopt; 398 struct ceph_mount_options *fsopt2 = fsc->mount_options; 399 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 400 int ret; 401 402 ret = memcmp(fsopt1, fsopt2, ofs); 403 if (ret) 404 return ret; 405 406 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 407 if (ret) 408 return ret; 409 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 410 if (ret) 411 return ret; 412 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 413 if (ret) 414 return ret; 415 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 416 if (ret) 417 return ret; 418 419 return ceph_compare_options(new_opt, fsc->client); 420 } 421 422 static int parse_mount_options(struct ceph_mount_options **pfsopt, 423 struct ceph_options **popt, 424 int flags, char *options, 425 const char *dev_name) 426 { 427 struct ceph_mount_options *fsopt; 428 const char *dev_name_end; 429 int err; 430 431 if (!dev_name || !*dev_name) 432 return -EINVAL; 433 434 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 435 if (!fsopt) 436 return -ENOMEM; 437 438 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 439 440 fsopt->sb_flags = flags; 441 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 442 443 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 444 fsopt->rsize = CEPH_MAX_READ_SIZE; 445 fsopt->rasize = CEPH_RASIZE_DEFAULT; 446 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 447 if (!fsopt->snapdir_name) { 448 err = -ENOMEM; 449 goto out; 450 } 451 452 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 453 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 454 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 455 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 456 fsopt->congestion_kb = default_congestion_kb(); 457 458 /* 459 * Distinguish the server list from the path in "dev_name". 460 * Internally we do not include the leading '/' in the path. 461 * 462 * "dev_name" will look like: 463 * <server_spec>[,<server_spec>...]:[<path>] 464 * where 465 * <server_spec> is <ip>[:<port>] 466 * <path> is optional, but if present must begin with '/' 467 */ 468 dev_name_end = strchr(dev_name, '/'); 469 if (dev_name_end) { 470 if (strlen(dev_name_end) > 1) { 471 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 472 if (!fsopt->server_path) { 473 err = -ENOMEM; 474 goto out; 475 } 476 } 477 } else { 478 dev_name_end = dev_name + strlen(dev_name); 479 } 480 err = -EINVAL; 481 dev_name_end--; /* back up to ':' separator */ 482 if (dev_name_end < dev_name || *dev_name_end != ':') { 483 pr_err("device name is missing path (no : separator in %s)\n", 484 dev_name); 485 goto out; 486 } 487 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 488 if (fsopt->server_path) 489 dout("server path '%s'\n", fsopt->server_path); 490 491 *popt = ceph_parse_options(options, dev_name, dev_name_end, 492 parse_fsopt_token, (void *)fsopt); 493 if (IS_ERR(*popt)) { 494 err = PTR_ERR(*popt); 495 goto out; 496 } 497 498 /* success */ 499 *pfsopt = fsopt; 500 return 0; 501 502 out: 503 destroy_mount_options(fsopt); 504 return err; 505 } 506 507 /** 508 * ceph_show_options - Show mount options in /proc/mounts 509 * @m: seq_file to write to 510 * @root: root of that (sub)tree 511 */ 512 static int ceph_show_options(struct seq_file *m, struct dentry *root) 513 { 514 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 515 struct ceph_mount_options *fsopt = fsc->mount_options; 516 size_t pos; 517 int ret; 518 519 /* a comma between MNT/MS and client options */ 520 seq_putc(m, ','); 521 pos = m->count; 522 523 ret = ceph_print_client_options(m, fsc->client); 524 if (ret) 525 return ret; 526 527 /* retract our comma if no client options */ 528 if (m->count == pos) 529 m->count--; 530 531 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 532 seq_puts(m, ",dirstat"); 533 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 534 seq_puts(m, ",rbytes"); 535 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 536 seq_puts(m, ",noasyncreaddir"); 537 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 538 seq_puts(m, ",nodcache"); 539 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 540 seq_puts(m, ",ino32"); 541 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 542 seq_show_option(m, "fsc", fsopt->fscache_uniq); 543 } 544 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 545 seq_puts(m, ",nopoolperm"); 546 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 547 seq_puts(m, ",noquotadf"); 548 549 #ifdef CONFIG_CEPH_FS_POSIX_ACL 550 if (fsopt->sb_flags & SB_POSIXACL) 551 seq_puts(m, ",acl"); 552 else 553 seq_puts(m, ",noacl"); 554 #endif 555 556 if (fsopt->mds_namespace) 557 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 558 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 559 seq_printf(m, ",wsize=%d", fsopt->wsize); 560 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 561 seq_printf(m, ",rsize=%d", fsopt->rsize); 562 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 563 seq_printf(m, ",rasize=%d", fsopt->rasize); 564 if (fsopt->congestion_kb != default_congestion_kb()) 565 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 566 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 567 seq_printf(m, ",caps_wanted_delay_min=%d", 568 fsopt->caps_wanted_delay_min); 569 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 570 seq_printf(m, ",caps_wanted_delay_max=%d", 571 fsopt->caps_wanted_delay_max); 572 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 573 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 574 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 575 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 576 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 577 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 578 579 return 0; 580 } 581 582 /* 583 * handle any mon messages the standard library doesn't understand. 584 * return error if we don't either. 585 */ 586 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 587 { 588 struct ceph_fs_client *fsc = client->private; 589 int type = le16_to_cpu(msg->hdr.type); 590 591 switch (type) { 592 case CEPH_MSG_MDS_MAP: 593 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 594 return 0; 595 case CEPH_MSG_FS_MAP_USER: 596 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 597 return 0; 598 default: 599 return -1; 600 } 601 } 602 603 /* 604 * create a new fs client 605 * 606 * Success or not, this function consumes @fsopt and @opt. 607 */ 608 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 609 struct ceph_options *opt) 610 { 611 struct ceph_fs_client *fsc; 612 int page_count; 613 size_t size; 614 int err; 615 616 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 617 if (!fsc) { 618 err = -ENOMEM; 619 goto fail; 620 } 621 622 fsc->client = ceph_create_client(opt, fsc); 623 if (IS_ERR(fsc->client)) { 624 err = PTR_ERR(fsc->client); 625 goto fail; 626 } 627 opt = NULL; /* fsc->client now owns this */ 628 629 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 630 fsc->client->osdc.abort_on_full = true; 631 632 if (!fsopt->mds_namespace) { 633 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 634 0, true); 635 } else { 636 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 637 0, false); 638 } 639 640 fsc->mount_options = fsopt; 641 642 fsc->sb = NULL; 643 fsc->mount_state = CEPH_MOUNT_MOUNTING; 644 645 atomic_long_set(&fsc->writeback_count, 0); 646 647 err = -ENOMEM; 648 /* 649 * The number of concurrent works can be high but they don't need 650 * to be processed in parallel, limit concurrency. 651 */ 652 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 653 if (!fsc->wb_wq) 654 goto fail_client; 655 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 656 if (!fsc->pg_inv_wq) 657 goto fail_wb_wq; 658 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 659 if (!fsc->trunc_wq) 660 goto fail_pg_inv_wq; 661 662 /* set up mempools */ 663 err = -ENOMEM; 664 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 665 size = sizeof (struct page *) * (page_count ? page_count : 1); 666 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 667 if (!fsc->wb_pagevec_pool) 668 goto fail_trunc_wq; 669 670 /* caps */ 671 fsc->min_caps = fsopt->max_readdir; 672 673 return fsc; 674 675 fail_trunc_wq: 676 destroy_workqueue(fsc->trunc_wq); 677 fail_pg_inv_wq: 678 destroy_workqueue(fsc->pg_inv_wq); 679 fail_wb_wq: 680 destroy_workqueue(fsc->wb_wq); 681 fail_client: 682 ceph_destroy_client(fsc->client); 683 fail: 684 kfree(fsc); 685 if (opt) 686 ceph_destroy_options(opt); 687 destroy_mount_options(fsopt); 688 return ERR_PTR(err); 689 } 690 691 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 692 { 693 flush_workqueue(fsc->wb_wq); 694 flush_workqueue(fsc->pg_inv_wq); 695 flush_workqueue(fsc->trunc_wq); 696 } 697 698 static void destroy_fs_client(struct ceph_fs_client *fsc) 699 { 700 dout("destroy_fs_client %p\n", fsc); 701 702 destroy_workqueue(fsc->wb_wq); 703 destroy_workqueue(fsc->pg_inv_wq); 704 destroy_workqueue(fsc->trunc_wq); 705 706 mempool_destroy(fsc->wb_pagevec_pool); 707 708 destroy_mount_options(fsc->mount_options); 709 710 ceph_destroy_client(fsc->client); 711 712 kfree(fsc); 713 dout("destroy_fs_client %p done\n", fsc); 714 } 715 716 /* 717 * caches 718 */ 719 struct kmem_cache *ceph_inode_cachep; 720 struct kmem_cache *ceph_cap_cachep; 721 struct kmem_cache *ceph_cap_flush_cachep; 722 struct kmem_cache *ceph_dentry_cachep; 723 struct kmem_cache *ceph_file_cachep; 724 struct kmem_cache *ceph_dir_file_cachep; 725 726 static void ceph_inode_init_once(void *foo) 727 { 728 struct ceph_inode_info *ci = foo; 729 inode_init_once(&ci->vfs_inode); 730 } 731 732 static int __init init_caches(void) 733 { 734 int error = -ENOMEM; 735 736 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 737 sizeof(struct ceph_inode_info), 738 __alignof__(struct ceph_inode_info), 739 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 740 SLAB_ACCOUNT, ceph_inode_init_once); 741 if (!ceph_inode_cachep) 742 return -ENOMEM; 743 744 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 745 if (!ceph_cap_cachep) 746 goto bad_cap; 747 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 748 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 749 if (!ceph_cap_flush_cachep) 750 goto bad_cap_flush; 751 752 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 753 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 754 if (!ceph_dentry_cachep) 755 goto bad_dentry; 756 757 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 758 if (!ceph_file_cachep) 759 goto bad_file; 760 761 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 762 if (!ceph_dir_file_cachep) 763 goto bad_dir_file; 764 765 error = ceph_fscache_register(); 766 if (error) 767 goto bad_fscache; 768 769 return 0; 770 771 bad_fscache: 772 kmem_cache_destroy(ceph_dir_file_cachep); 773 bad_dir_file: 774 kmem_cache_destroy(ceph_file_cachep); 775 bad_file: 776 kmem_cache_destroy(ceph_dentry_cachep); 777 bad_dentry: 778 kmem_cache_destroy(ceph_cap_flush_cachep); 779 bad_cap_flush: 780 kmem_cache_destroy(ceph_cap_cachep); 781 bad_cap: 782 kmem_cache_destroy(ceph_inode_cachep); 783 return error; 784 } 785 786 static void destroy_caches(void) 787 { 788 /* 789 * Make sure all delayed rcu free inodes are flushed before we 790 * destroy cache. 791 */ 792 rcu_barrier(); 793 794 kmem_cache_destroy(ceph_inode_cachep); 795 kmem_cache_destroy(ceph_cap_cachep); 796 kmem_cache_destroy(ceph_cap_flush_cachep); 797 kmem_cache_destroy(ceph_dentry_cachep); 798 kmem_cache_destroy(ceph_file_cachep); 799 kmem_cache_destroy(ceph_dir_file_cachep); 800 801 ceph_fscache_unregister(); 802 } 803 804 805 /* 806 * ceph_umount_begin - initiate forced umount. Tear down down the 807 * mount, skipping steps that may hang while waiting for server(s). 808 */ 809 static void ceph_umount_begin(struct super_block *sb) 810 { 811 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 812 813 dout("ceph_umount_begin - starting forced umount\n"); 814 if (!fsc) 815 return; 816 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 817 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 818 ceph_mdsc_force_umount(fsc->mdsc); 819 return; 820 } 821 822 static const struct super_operations ceph_super_ops = { 823 .alloc_inode = ceph_alloc_inode, 824 .destroy_inode = ceph_destroy_inode, 825 .write_inode = ceph_write_inode, 826 .drop_inode = ceph_drop_inode, 827 .sync_fs = ceph_sync_fs, 828 .put_super = ceph_put_super, 829 .show_options = ceph_show_options, 830 .statfs = ceph_statfs, 831 .umount_begin = ceph_umount_begin, 832 }; 833 834 /* 835 * Bootstrap mount by opening the root directory. Note the mount 836 * @started time from caller, and time out if this takes too long. 837 */ 838 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 839 const char *path, 840 unsigned long started) 841 { 842 struct ceph_mds_client *mdsc = fsc->mdsc; 843 struct ceph_mds_request *req = NULL; 844 int err; 845 struct dentry *root; 846 847 /* open dir */ 848 dout("open_root_inode opening '%s'\n", path); 849 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 850 if (IS_ERR(req)) 851 return ERR_CAST(req); 852 req->r_path1 = kstrdup(path, GFP_NOFS); 853 if (!req->r_path1) { 854 root = ERR_PTR(-ENOMEM); 855 goto out; 856 } 857 858 req->r_ino1.ino = CEPH_INO_ROOT; 859 req->r_ino1.snap = CEPH_NOSNAP; 860 req->r_started = started; 861 req->r_timeout = fsc->client->options->mount_timeout; 862 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 863 req->r_num_caps = 2; 864 err = ceph_mdsc_do_request(mdsc, NULL, req); 865 if (err == 0) { 866 struct inode *inode = req->r_target_inode; 867 req->r_target_inode = NULL; 868 dout("open_root_inode success\n"); 869 root = d_make_root(inode); 870 if (!root) { 871 root = ERR_PTR(-ENOMEM); 872 goto out; 873 } 874 dout("open_root_inode success, root dentry is %p\n", root); 875 } else { 876 root = ERR_PTR(err); 877 } 878 out: 879 ceph_mdsc_put_request(req); 880 return root; 881 } 882 883 884 885 886 /* 887 * mount: join the ceph cluster, and open root directory. 888 */ 889 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) 890 { 891 int err; 892 unsigned long started = jiffies; /* note the start time */ 893 struct dentry *root; 894 895 dout("mount start %p\n", fsc); 896 mutex_lock(&fsc->client->mount_mutex); 897 898 if (!fsc->sb->s_root) { 899 const char *path; 900 err = __ceph_open_session(fsc->client, started); 901 if (err < 0) 902 goto out; 903 904 /* setup fscache */ 905 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 906 err = ceph_fscache_register_fs(fsc); 907 if (err < 0) 908 goto out; 909 } 910 911 if (!fsc->mount_options->server_path) { 912 path = ""; 913 dout("mount opening path \\t\n"); 914 } else { 915 path = fsc->mount_options->server_path + 1; 916 dout("mount opening path %s\n", path); 917 } 918 919 err = ceph_fs_debugfs_init(fsc); 920 if (err < 0) 921 goto out; 922 923 root = open_root_dentry(fsc, path, started); 924 if (IS_ERR(root)) { 925 err = PTR_ERR(root); 926 goto out; 927 } 928 fsc->sb->s_root = dget(root); 929 } else { 930 root = dget(fsc->sb->s_root); 931 } 932 933 fsc->mount_state = CEPH_MOUNT_MOUNTED; 934 dout("mount success\n"); 935 mutex_unlock(&fsc->client->mount_mutex); 936 return root; 937 938 out: 939 mutex_unlock(&fsc->client->mount_mutex); 940 return ERR_PTR(err); 941 } 942 943 static int ceph_set_super(struct super_block *s, void *data) 944 { 945 struct ceph_fs_client *fsc = data; 946 int ret; 947 948 dout("set_super %p data %p\n", s, data); 949 950 s->s_flags = fsc->mount_options->sb_flags; 951 s->s_maxbytes = MAX_LFS_FILESIZE; 952 953 s->s_xattr = ceph_xattr_handlers; 954 s->s_fs_info = fsc; 955 fsc->sb = s; 956 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 957 958 s->s_op = &ceph_super_ops; 959 s->s_d_op = &ceph_dentry_ops; 960 s->s_export_op = &ceph_export_ops; 961 962 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 963 964 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 965 if (ret != 0) 966 goto fail; 967 968 return ret; 969 970 fail: 971 s->s_fs_info = NULL; 972 fsc->sb = NULL; 973 return ret; 974 } 975 976 /* 977 * share superblock if same fs AND options 978 */ 979 static int ceph_compare_super(struct super_block *sb, void *data) 980 { 981 struct ceph_fs_client *new = data; 982 struct ceph_mount_options *fsopt = new->mount_options; 983 struct ceph_options *opt = new->client->options; 984 struct ceph_fs_client *other = ceph_sb_to_client(sb); 985 986 dout("ceph_compare_super %p\n", sb); 987 988 if (compare_mount_options(fsopt, opt, other)) { 989 dout("monitor(s)/mount options don't match\n"); 990 return 0; 991 } 992 if ((opt->flags & CEPH_OPT_FSID) && 993 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 994 dout("fsid doesn't match\n"); 995 return 0; 996 } 997 if (fsopt->sb_flags != other->mount_options->sb_flags) { 998 dout("flags differ\n"); 999 return 0; 1000 } 1001 return 1; 1002 } 1003 1004 /* 1005 * construct our own bdi so we can control readahead, etc. 1006 */ 1007 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1008 1009 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1010 { 1011 int err; 1012 1013 err = super_setup_bdi_name(sb, "ceph-%ld", 1014 atomic_long_inc_return(&bdi_seq)); 1015 if (err) 1016 return err; 1017 1018 /* set ra_pages based on rasize mount option? */ 1019 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1020 1021 /* set io_pages based on max osd read size */ 1022 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1023 1024 return 0; 1025 } 1026 1027 static struct dentry *ceph_mount(struct file_system_type *fs_type, 1028 int flags, const char *dev_name, void *data) 1029 { 1030 struct super_block *sb; 1031 struct ceph_fs_client *fsc; 1032 struct dentry *res; 1033 int err; 1034 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 1035 struct ceph_mount_options *fsopt = NULL; 1036 struct ceph_options *opt = NULL; 1037 1038 dout("ceph_mount\n"); 1039 1040 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1041 flags |= SB_POSIXACL; 1042 #endif 1043 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); 1044 if (err < 0) { 1045 res = ERR_PTR(err); 1046 goto out_final; 1047 } 1048 1049 /* create client (which we may/may not use) */ 1050 fsc = create_fs_client(fsopt, opt); 1051 if (IS_ERR(fsc)) { 1052 res = ERR_CAST(fsc); 1053 goto out_final; 1054 } 1055 1056 err = ceph_mdsc_init(fsc); 1057 if (err < 0) { 1058 res = ERR_PTR(err); 1059 goto out; 1060 } 1061 1062 if (ceph_test_opt(fsc->client, NOSHARE)) 1063 compare_super = NULL; 1064 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 1065 if (IS_ERR(sb)) { 1066 res = ERR_CAST(sb); 1067 goto out; 1068 } 1069 1070 if (ceph_sb_to_client(sb) != fsc) { 1071 ceph_mdsc_destroy(fsc); 1072 destroy_fs_client(fsc); 1073 fsc = ceph_sb_to_client(sb); 1074 dout("get_sb got existing client %p\n", fsc); 1075 } else { 1076 dout("get_sb using new client %p\n", fsc); 1077 err = ceph_setup_bdi(sb, fsc); 1078 if (err < 0) { 1079 res = ERR_PTR(err); 1080 goto out_splat; 1081 } 1082 } 1083 1084 res = ceph_real_mount(fsc); 1085 if (IS_ERR(res)) 1086 goto out_splat; 1087 dout("root %p inode %p ino %llx.%llx\n", res, 1088 d_inode(res), ceph_vinop(d_inode(res))); 1089 return res; 1090 1091 out_splat: 1092 ceph_mdsc_close_sessions(fsc->mdsc); 1093 deactivate_locked_super(sb); 1094 goto out_final; 1095 1096 out: 1097 ceph_mdsc_destroy(fsc); 1098 destroy_fs_client(fsc); 1099 out_final: 1100 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 1101 return res; 1102 } 1103 1104 static void ceph_kill_sb(struct super_block *s) 1105 { 1106 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1107 dev_t dev = s->s_dev; 1108 1109 dout("kill_sb %p\n", s); 1110 1111 ceph_mdsc_pre_umount(fsc->mdsc); 1112 flush_fs_workqueues(fsc); 1113 1114 generic_shutdown_super(s); 1115 1116 fsc->client->extra_mon_dispatch = NULL; 1117 ceph_fs_debugfs_cleanup(fsc); 1118 1119 ceph_fscache_unregister_fs(fsc); 1120 1121 ceph_mdsc_destroy(fsc); 1122 1123 destroy_fs_client(fsc); 1124 free_anon_bdev(dev); 1125 } 1126 1127 static struct file_system_type ceph_fs_type = { 1128 .owner = THIS_MODULE, 1129 .name = "ceph", 1130 .mount = ceph_mount, 1131 .kill_sb = ceph_kill_sb, 1132 .fs_flags = FS_RENAME_DOES_D_MOVE, 1133 }; 1134 MODULE_ALIAS_FS("ceph"); 1135 1136 static int __init init_ceph(void) 1137 { 1138 int ret = init_caches(); 1139 if (ret) 1140 goto out; 1141 1142 ceph_flock_init(); 1143 ceph_xattr_init(); 1144 ret = register_filesystem(&ceph_fs_type); 1145 if (ret) 1146 goto out_xattr; 1147 1148 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1149 1150 return 0; 1151 1152 out_xattr: 1153 ceph_xattr_exit(); 1154 destroy_caches(); 1155 out: 1156 return ret; 1157 } 1158 1159 static void __exit exit_ceph(void) 1160 { 1161 dout("exit_ceph\n"); 1162 unregister_filesystem(&ceph_fs_type); 1163 ceph_xattr_exit(); 1164 destroy_caches(); 1165 } 1166 1167 module_init(init_ceph); 1168 module_exit(exit_ceph); 1169 1170 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1171 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1172 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1173 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1174 MODULE_LICENSE("GPL"); 1175