1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 #include <uapi/linux/magic.h> 31 32 static DEFINE_SPINLOCK(ceph_fsc_lock); 33 static LIST_HEAD(ceph_fsc_list); 34 35 /* 36 * Ceph superblock operations 37 * 38 * Handle the basics of mounting, unmounting. 39 */ 40 41 /* 42 * super ops 43 */ 44 static void ceph_put_super(struct super_block *s) 45 { 46 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 47 48 dout("put_super\n"); 49 ceph_mdsc_close_sessions(fsc->mdsc); 50 } 51 52 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 53 { 54 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 55 struct ceph_mon_client *monc = &fsc->client->monc; 56 struct ceph_statfs st; 57 int i, err; 58 u64 data_pool; 59 60 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 61 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 62 } else { 63 data_pool = CEPH_NOPOOL; 64 } 65 66 dout("statfs\n"); 67 err = ceph_monc_do_statfs(monc, data_pool, &st); 68 if (err < 0) 69 return err; 70 71 /* fill in kstatfs */ 72 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 73 74 /* 75 * express utilization in terms of large blocks to avoid 76 * overflow on 32-bit machines. 77 * 78 * NOTE: for the time being, we make bsize == frsize to humor 79 * not-yet-ancient versions of glibc that are broken. 80 * Someday, we will probably want to report a real block 81 * size... whatever that may mean for a network file system! 82 */ 83 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 84 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 85 86 /* 87 * By default use root quota for stats; fallback to overall filesystem 88 * usage if using 'noquotadf' mount option or if the root dir doesn't 89 * have max_bytes quota set. 90 */ 91 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 92 !ceph_quota_update_statfs(fsc, buf)) { 93 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 94 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 95 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 96 } 97 98 buf->f_files = le64_to_cpu(st.num_objects); 99 buf->f_ffree = -1; 100 buf->f_namelen = NAME_MAX; 101 102 /* Must convert the fsid, for consistent values across arches */ 103 buf->f_fsid.val[0] = 0; 104 mutex_lock(&monc->mutex); 105 for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) 106 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); 107 mutex_unlock(&monc->mutex); 108 109 /* fold the fs_cluster_id into the upper bits */ 110 buf->f_fsid.val[1] = monc->fs_cluster_id; 111 112 return 0; 113 } 114 115 static int ceph_sync_fs(struct super_block *sb, int wait) 116 { 117 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 118 119 if (!wait) { 120 dout("sync_fs (non-blocking)\n"); 121 ceph_flush_dirty_caps(fsc->mdsc); 122 dout("sync_fs (non-blocking) done\n"); 123 return 0; 124 } 125 126 dout("sync_fs (blocking)\n"); 127 ceph_osdc_sync(&fsc->client->osdc); 128 ceph_mdsc_sync(fsc->mdsc); 129 dout("sync_fs (blocking) done\n"); 130 return 0; 131 } 132 133 /* 134 * mount options 135 */ 136 enum { 137 Opt_wsize, 138 Opt_rsize, 139 Opt_rasize, 140 Opt_caps_wanted_delay_min, 141 Opt_caps_wanted_delay_max, 142 Opt_caps_max, 143 Opt_readdir_max_entries, 144 Opt_readdir_max_bytes, 145 Opt_congestion_kb, 146 /* int args above */ 147 Opt_snapdirname, 148 Opt_mds_namespace, 149 Opt_recover_session, 150 Opt_source, 151 Opt_mon_addr, 152 /* string args above */ 153 Opt_dirstat, 154 Opt_rbytes, 155 Opt_asyncreaddir, 156 Opt_dcache, 157 Opt_ino32, 158 Opt_fscache, 159 Opt_poolperm, 160 Opt_require_active_mds, 161 Opt_acl, 162 Opt_quotadf, 163 Opt_copyfrom, 164 Opt_wsync, 165 Opt_pagecache, 166 }; 167 168 enum ceph_recover_session_mode { 169 ceph_recover_session_no, 170 ceph_recover_session_clean 171 }; 172 173 static const struct constant_table ceph_param_recover[] = { 174 { "no", ceph_recover_session_no }, 175 { "clean", ceph_recover_session_clean }, 176 {} 177 }; 178 179 static const struct fs_parameter_spec ceph_mount_parameters[] = { 180 fsparam_flag_no ("acl", Opt_acl), 181 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 182 fsparam_s32 ("caps_max", Opt_caps_max), 183 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 184 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 185 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 186 fsparam_flag_no ("copyfrom", Opt_copyfrom), 187 fsparam_flag_no ("dcache", Opt_dcache), 188 fsparam_flag_no ("dirstat", Opt_dirstat), 189 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 190 fsparam_string ("fsc", Opt_fscache), // fsc=... 191 fsparam_flag_no ("ino32", Opt_ino32), 192 fsparam_string ("mds_namespace", Opt_mds_namespace), 193 fsparam_flag_no ("poolperm", Opt_poolperm), 194 fsparam_flag_no ("quotadf", Opt_quotadf), 195 fsparam_u32 ("rasize", Opt_rasize), 196 fsparam_flag_no ("rbytes", Opt_rbytes), 197 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 198 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 199 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 200 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 201 fsparam_u32 ("rsize", Opt_rsize), 202 fsparam_string ("snapdirname", Opt_snapdirname), 203 fsparam_string ("source", Opt_source), 204 fsparam_string ("mon_addr", Opt_mon_addr), 205 fsparam_u32 ("wsize", Opt_wsize), 206 fsparam_flag_no ("wsync", Opt_wsync), 207 fsparam_flag_no ("pagecache", Opt_pagecache), 208 {} 209 }; 210 211 struct ceph_parse_opts_ctx { 212 struct ceph_options *copts; 213 struct ceph_mount_options *opts; 214 }; 215 216 /* 217 * Remove adjacent slashes and then the trailing slash, unless it is 218 * the only remaining character. 219 * 220 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 221 */ 222 static void canonicalize_path(char *path) 223 { 224 int i, j = 0; 225 226 for (i = 0; path[i] != '\0'; i++) { 227 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 228 path[j++] = path[i]; 229 } 230 231 if (j > 1 && path[j - 1] == '/') 232 j--; 233 path[j] = '\0'; 234 } 235 236 /* 237 * Check if the mds namespace in ceph_mount_options matches 238 * the passed in namespace string. First time match (when 239 * ->mds_namespace is NULL) is treated specially, since 240 * ->mds_namespace needs to be initialized by the caller. 241 */ 242 static int namespace_equals(struct ceph_mount_options *fsopt, 243 const char *namespace, size_t len) 244 { 245 return !(fsopt->mds_namespace && 246 (strlen(fsopt->mds_namespace) != len || 247 strncmp(fsopt->mds_namespace, namespace, len))); 248 } 249 250 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, 251 struct fs_context *fc) 252 { 253 int r; 254 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 255 struct ceph_mount_options *fsopt = pctx->opts; 256 257 if (*dev_name_end != ':') 258 return invalfc(fc, "separator ':' missing in source"); 259 260 r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, 261 pctx->copts, fc->log.log, ','); 262 if (r) 263 return r; 264 265 fsopt->new_dev_syntax = false; 266 return 0; 267 } 268 269 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, 270 struct fs_context *fc) 271 { 272 size_t len; 273 struct ceph_fsid fsid; 274 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 275 struct ceph_mount_options *fsopt = pctx->opts; 276 char *fsid_start, *fs_name_start; 277 278 if (*dev_name_end != '=') { 279 dout("separator '=' missing in source"); 280 return -EINVAL; 281 } 282 283 fsid_start = strchr(dev_name, '@'); 284 if (!fsid_start) 285 return invalfc(fc, "missing cluster fsid"); 286 ++fsid_start; /* start of cluster fsid */ 287 288 fs_name_start = strchr(fsid_start, '.'); 289 if (!fs_name_start) 290 return invalfc(fc, "missing file system name"); 291 292 if (ceph_parse_fsid(fsid_start, &fsid)) 293 return invalfc(fc, "Invalid FSID"); 294 295 ++fs_name_start; /* start of file system name */ 296 len = dev_name_end - fs_name_start; 297 298 if (!namespace_equals(fsopt, fs_name_start, len)) 299 return invalfc(fc, "Mismatching mds_namespace"); 300 kfree(fsopt->mds_namespace); 301 fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); 302 if (!fsopt->mds_namespace) 303 return -ENOMEM; 304 dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); 305 306 fsopt->new_dev_syntax = true; 307 return 0; 308 } 309 310 /* 311 * Parse the source parameter for new device format. Distinguish the device 312 * spec from the path. Try parsing new device format and fallback to old 313 * format if needed. 314 * 315 * New device syntax will looks like: 316 * <device_spec>=/<path> 317 * where 318 * <device_spec> is name@fsid.fsname 319 * <path> is optional, but if present must begin with '/' 320 * (monitor addresses are passed via mount option) 321 * 322 * Old device syntax is: 323 * <server_spec>[,<server_spec>...]:[<path>] 324 * where 325 * <server_spec> is <ip>[:<port>] 326 * <path> is optional, but if present must begin with '/' 327 */ 328 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 329 { 330 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 331 struct ceph_mount_options *fsopt = pctx->opts; 332 char *dev_name = param->string, *dev_name_end; 333 int ret; 334 335 dout("%s '%s'\n", __func__, dev_name); 336 if (!dev_name || !*dev_name) 337 return invalfc(fc, "Empty source"); 338 339 dev_name_end = strchr(dev_name, '/'); 340 if (dev_name_end) { 341 /* 342 * The server_path will include the whole chars from userland 343 * including the leading '/'. 344 */ 345 kfree(fsopt->server_path); 346 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 347 if (!fsopt->server_path) 348 return -ENOMEM; 349 350 canonicalize_path(fsopt->server_path); 351 } else { 352 dev_name_end = dev_name + strlen(dev_name); 353 } 354 355 dev_name_end--; /* back up to separator */ 356 if (dev_name_end < dev_name) 357 return invalfc(fc, "Path missing in source"); 358 359 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 360 if (fsopt->server_path) 361 dout("server path '%s'\n", fsopt->server_path); 362 363 dout("trying new device syntax"); 364 ret = ceph_parse_new_source(dev_name, dev_name_end, fc); 365 if (ret) { 366 if (ret != -EINVAL) 367 return ret; 368 dout("trying old device syntax"); 369 ret = ceph_parse_old_source(dev_name, dev_name_end, fc); 370 if (ret) 371 return ret; 372 } 373 374 fc->source = param->string; 375 param->string = NULL; 376 return 0; 377 } 378 379 static int ceph_parse_mon_addr(struct fs_parameter *param, 380 struct fs_context *fc) 381 { 382 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 383 struct ceph_mount_options *fsopt = pctx->opts; 384 385 kfree(fsopt->mon_addr); 386 fsopt->mon_addr = param->string; 387 param->string = NULL; 388 389 return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), 390 pctx->copts, fc->log.log, '/'); 391 } 392 393 static int ceph_parse_mount_param(struct fs_context *fc, 394 struct fs_parameter *param) 395 { 396 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 397 struct ceph_mount_options *fsopt = pctx->opts; 398 struct fs_parse_result result; 399 unsigned int mode; 400 int token, ret; 401 402 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 403 if (ret != -ENOPARAM) 404 return ret; 405 406 token = fs_parse(fc, ceph_mount_parameters, param, &result); 407 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 408 if (token < 0) 409 return token; 410 411 switch (token) { 412 case Opt_snapdirname: 413 kfree(fsopt->snapdir_name); 414 fsopt->snapdir_name = param->string; 415 param->string = NULL; 416 break; 417 case Opt_mds_namespace: 418 if (!namespace_equals(fsopt, param->string, strlen(param->string))) 419 return invalfc(fc, "Mismatching mds_namespace"); 420 kfree(fsopt->mds_namespace); 421 fsopt->mds_namespace = param->string; 422 param->string = NULL; 423 break; 424 case Opt_recover_session: 425 mode = result.uint_32; 426 if (mode == ceph_recover_session_no) 427 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 428 else if (mode == ceph_recover_session_clean) 429 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 430 else 431 BUG(); 432 break; 433 case Opt_source: 434 if (fc->source) 435 return invalfc(fc, "Multiple sources specified"); 436 return ceph_parse_source(param, fc); 437 case Opt_mon_addr: 438 return ceph_parse_mon_addr(param, fc); 439 case Opt_wsize: 440 if (result.uint_32 < PAGE_SIZE || 441 result.uint_32 > CEPH_MAX_WRITE_SIZE) 442 goto out_of_range; 443 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 444 break; 445 case Opt_rsize: 446 if (result.uint_32 < PAGE_SIZE || 447 result.uint_32 > CEPH_MAX_READ_SIZE) 448 goto out_of_range; 449 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 450 break; 451 case Opt_rasize: 452 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 453 break; 454 case Opt_caps_wanted_delay_min: 455 if (result.uint_32 < 1) 456 goto out_of_range; 457 fsopt->caps_wanted_delay_min = result.uint_32; 458 break; 459 case Opt_caps_wanted_delay_max: 460 if (result.uint_32 < 1) 461 goto out_of_range; 462 fsopt->caps_wanted_delay_max = result.uint_32; 463 break; 464 case Opt_caps_max: 465 if (result.int_32 < 0) 466 goto out_of_range; 467 fsopt->caps_max = result.int_32; 468 break; 469 case Opt_readdir_max_entries: 470 if (result.uint_32 < 1) 471 goto out_of_range; 472 fsopt->max_readdir = result.uint_32; 473 break; 474 case Opt_readdir_max_bytes: 475 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 476 goto out_of_range; 477 fsopt->max_readdir_bytes = result.uint_32; 478 break; 479 case Opt_congestion_kb: 480 if (result.uint_32 < 1024) /* at least 1M */ 481 goto out_of_range; 482 fsopt->congestion_kb = result.uint_32; 483 break; 484 case Opt_dirstat: 485 if (!result.negated) 486 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 487 else 488 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 489 break; 490 case Opt_rbytes: 491 if (!result.negated) 492 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 493 else 494 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 495 break; 496 case Opt_asyncreaddir: 497 if (!result.negated) 498 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 499 else 500 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 501 break; 502 case Opt_dcache: 503 if (!result.negated) 504 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 505 else 506 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 507 break; 508 case Opt_ino32: 509 if (!result.negated) 510 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 511 else 512 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 513 break; 514 515 case Opt_fscache: 516 #ifdef CONFIG_CEPH_FSCACHE 517 kfree(fsopt->fscache_uniq); 518 fsopt->fscache_uniq = NULL; 519 if (result.negated) { 520 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 521 } else { 522 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 523 fsopt->fscache_uniq = param->string; 524 param->string = NULL; 525 } 526 break; 527 #else 528 return invalfc(fc, "fscache support is disabled"); 529 #endif 530 case Opt_poolperm: 531 if (!result.negated) 532 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 533 else 534 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 535 break; 536 case Opt_require_active_mds: 537 if (!result.negated) 538 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 539 else 540 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 541 break; 542 case Opt_quotadf: 543 if (!result.negated) 544 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 545 else 546 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 547 break; 548 case Opt_copyfrom: 549 if (!result.negated) 550 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 551 else 552 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 553 break; 554 case Opt_acl: 555 if (!result.negated) { 556 #ifdef CONFIG_CEPH_FS_POSIX_ACL 557 fc->sb_flags |= SB_POSIXACL; 558 #else 559 return invalfc(fc, "POSIX ACL support is disabled"); 560 #endif 561 } else { 562 fc->sb_flags &= ~SB_POSIXACL; 563 } 564 break; 565 case Opt_wsync: 566 if (!result.negated) 567 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 568 else 569 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 570 break; 571 case Opt_pagecache: 572 if (result.negated) 573 fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; 574 else 575 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; 576 break; 577 default: 578 BUG(); 579 } 580 return 0; 581 582 out_of_range: 583 return invalfc(fc, "%s out of range", param->key); 584 } 585 586 static void destroy_mount_options(struct ceph_mount_options *args) 587 { 588 dout("destroy_mount_options %p\n", args); 589 if (!args) 590 return; 591 592 kfree(args->snapdir_name); 593 kfree(args->mds_namespace); 594 kfree(args->server_path); 595 kfree(args->fscache_uniq); 596 kfree(args->mon_addr); 597 kfree(args); 598 } 599 600 static int strcmp_null(const char *s1, const char *s2) 601 { 602 if (!s1 && !s2) 603 return 0; 604 if (s1 && !s2) 605 return -1; 606 if (!s1 && s2) 607 return 1; 608 return strcmp(s1, s2); 609 } 610 611 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 612 struct ceph_options *new_opt, 613 struct ceph_fs_client *fsc) 614 { 615 struct ceph_mount_options *fsopt1 = new_fsopt; 616 struct ceph_mount_options *fsopt2 = fsc->mount_options; 617 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 618 int ret; 619 620 ret = memcmp(fsopt1, fsopt2, ofs); 621 if (ret) 622 return ret; 623 624 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 625 if (ret) 626 return ret; 627 628 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 629 if (ret) 630 return ret; 631 632 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 633 if (ret) 634 return ret; 635 636 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 637 if (ret) 638 return ret; 639 640 ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); 641 if (ret) 642 return ret; 643 644 return ceph_compare_options(new_opt, fsc->client); 645 } 646 647 /** 648 * ceph_show_options - Show mount options in /proc/mounts 649 * @m: seq_file to write to 650 * @root: root of that (sub)tree 651 */ 652 static int ceph_show_options(struct seq_file *m, struct dentry *root) 653 { 654 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 655 struct ceph_mount_options *fsopt = fsc->mount_options; 656 size_t pos; 657 int ret; 658 659 /* a comma between MNT/MS and client options */ 660 seq_putc(m, ','); 661 pos = m->count; 662 663 ret = ceph_print_client_options(m, fsc->client, false); 664 if (ret) 665 return ret; 666 667 /* retract our comma if no client options */ 668 if (m->count == pos) 669 m->count--; 670 671 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 672 seq_puts(m, ",dirstat"); 673 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 674 seq_puts(m, ",rbytes"); 675 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 676 seq_puts(m, ",noasyncreaddir"); 677 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 678 seq_puts(m, ",nodcache"); 679 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 680 seq_puts(m, ",ino32"); 681 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 682 seq_show_option(m, "fsc", fsopt->fscache_uniq); 683 } 684 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 685 seq_puts(m, ",nopoolperm"); 686 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 687 seq_puts(m, ",noquotadf"); 688 689 #ifdef CONFIG_CEPH_FS_POSIX_ACL 690 if (root->d_sb->s_flags & SB_POSIXACL) 691 seq_puts(m, ",acl"); 692 else 693 seq_puts(m, ",noacl"); 694 #endif 695 696 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 697 seq_puts(m, ",copyfrom"); 698 699 /* dump mds_namespace when old device syntax is in use */ 700 if (fsopt->mds_namespace && !fsopt->new_dev_syntax) 701 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 702 703 if (fsopt->mon_addr) 704 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); 705 706 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 707 seq_show_option(m, "recover_session", "clean"); 708 709 if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) 710 seq_puts(m, ",wsync"); 711 712 if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) 713 seq_puts(m, ",nopagecache"); 714 715 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 716 seq_printf(m, ",wsize=%u", fsopt->wsize); 717 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 718 seq_printf(m, ",rsize=%u", fsopt->rsize); 719 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 720 seq_printf(m, ",rasize=%u", fsopt->rasize); 721 if (fsopt->congestion_kb != default_congestion_kb()) 722 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 723 if (fsopt->caps_max) 724 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 725 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 726 seq_printf(m, ",caps_wanted_delay_min=%u", 727 fsopt->caps_wanted_delay_min); 728 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 729 seq_printf(m, ",caps_wanted_delay_max=%u", 730 fsopt->caps_wanted_delay_max); 731 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 732 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 733 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 734 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 735 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 736 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 737 738 return 0; 739 } 740 741 /* 742 * handle any mon messages the standard library doesn't understand. 743 * return error if we don't either. 744 */ 745 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 746 { 747 struct ceph_fs_client *fsc = client->private; 748 int type = le16_to_cpu(msg->hdr.type); 749 750 switch (type) { 751 case CEPH_MSG_MDS_MAP: 752 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 753 return 0; 754 case CEPH_MSG_FS_MAP_USER: 755 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 756 return 0; 757 default: 758 return -1; 759 } 760 } 761 762 /* 763 * create a new fs client 764 * 765 * Success or not, this function consumes @fsopt and @opt. 766 */ 767 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 768 struct ceph_options *opt) 769 { 770 struct ceph_fs_client *fsc; 771 int err; 772 773 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 774 if (!fsc) { 775 err = -ENOMEM; 776 goto fail; 777 } 778 779 fsc->client = ceph_create_client(opt, fsc); 780 if (IS_ERR(fsc->client)) { 781 err = PTR_ERR(fsc->client); 782 goto fail; 783 } 784 opt = NULL; /* fsc->client now owns this */ 785 786 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 787 ceph_set_opt(fsc->client, ABORT_ON_FULL); 788 789 if (!fsopt->mds_namespace) { 790 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 791 0, true); 792 } else { 793 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 794 0, false); 795 } 796 797 fsc->mount_options = fsopt; 798 799 fsc->sb = NULL; 800 fsc->mount_state = CEPH_MOUNT_MOUNTING; 801 fsc->filp_gen = 1; 802 fsc->have_copy_from2 = true; 803 804 atomic_long_set(&fsc->writeback_count, 0); 805 fsc->write_congested = false; 806 807 err = -ENOMEM; 808 /* 809 * The number of concurrent works can be high but they don't need 810 * to be processed in parallel, limit concurrency. 811 */ 812 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 813 if (!fsc->inode_wq) 814 goto fail_client; 815 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 816 if (!fsc->cap_wq) 817 goto fail_inode_wq; 818 819 spin_lock(&ceph_fsc_lock); 820 list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 821 spin_unlock(&ceph_fsc_lock); 822 823 return fsc; 824 825 fail_inode_wq: 826 destroy_workqueue(fsc->inode_wq); 827 fail_client: 828 ceph_destroy_client(fsc->client); 829 fail: 830 kfree(fsc); 831 if (opt) 832 ceph_destroy_options(opt); 833 destroy_mount_options(fsopt); 834 return ERR_PTR(err); 835 } 836 837 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 838 { 839 flush_workqueue(fsc->inode_wq); 840 flush_workqueue(fsc->cap_wq); 841 } 842 843 static void destroy_fs_client(struct ceph_fs_client *fsc) 844 { 845 dout("destroy_fs_client %p\n", fsc); 846 847 spin_lock(&ceph_fsc_lock); 848 list_del(&fsc->metric_wakeup); 849 spin_unlock(&ceph_fsc_lock); 850 851 ceph_mdsc_destroy(fsc); 852 destroy_workqueue(fsc->inode_wq); 853 destroy_workqueue(fsc->cap_wq); 854 855 destroy_mount_options(fsc->mount_options); 856 857 ceph_destroy_client(fsc->client); 858 859 kfree(fsc); 860 dout("destroy_fs_client %p done\n", fsc); 861 } 862 863 /* 864 * caches 865 */ 866 struct kmem_cache *ceph_inode_cachep; 867 struct kmem_cache *ceph_cap_cachep; 868 struct kmem_cache *ceph_cap_flush_cachep; 869 struct kmem_cache *ceph_dentry_cachep; 870 struct kmem_cache *ceph_file_cachep; 871 struct kmem_cache *ceph_dir_file_cachep; 872 struct kmem_cache *ceph_mds_request_cachep; 873 mempool_t *ceph_wb_pagevec_pool; 874 875 static void ceph_inode_init_once(void *foo) 876 { 877 struct ceph_inode_info *ci = foo; 878 inode_init_once(&ci->vfs_inode); 879 } 880 881 static int __init init_caches(void) 882 { 883 int error = -ENOMEM; 884 885 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 886 sizeof(struct ceph_inode_info), 887 __alignof__(struct ceph_inode_info), 888 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 889 SLAB_ACCOUNT, ceph_inode_init_once); 890 if (!ceph_inode_cachep) 891 return -ENOMEM; 892 893 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 894 if (!ceph_cap_cachep) 895 goto bad_cap; 896 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 897 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 898 if (!ceph_cap_flush_cachep) 899 goto bad_cap_flush; 900 901 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 902 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 903 if (!ceph_dentry_cachep) 904 goto bad_dentry; 905 906 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 907 if (!ceph_file_cachep) 908 goto bad_file; 909 910 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 911 if (!ceph_dir_file_cachep) 912 goto bad_dir_file; 913 914 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 915 if (!ceph_mds_request_cachep) 916 goto bad_mds_req; 917 918 ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); 919 if (!ceph_wb_pagevec_pool) 920 goto bad_pagevec_pool; 921 922 return 0; 923 924 bad_pagevec_pool: 925 kmem_cache_destroy(ceph_mds_request_cachep); 926 bad_mds_req: 927 kmem_cache_destroy(ceph_dir_file_cachep); 928 bad_dir_file: 929 kmem_cache_destroy(ceph_file_cachep); 930 bad_file: 931 kmem_cache_destroy(ceph_dentry_cachep); 932 bad_dentry: 933 kmem_cache_destroy(ceph_cap_flush_cachep); 934 bad_cap_flush: 935 kmem_cache_destroy(ceph_cap_cachep); 936 bad_cap: 937 kmem_cache_destroy(ceph_inode_cachep); 938 return error; 939 } 940 941 static void destroy_caches(void) 942 { 943 /* 944 * Make sure all delayed rcu free inodes are flushed before we 945 * destroy cache. 946 */ 947 rcu_barrier(); 948 949 kmem_cache_destroy(ceph_inode_cachep); 950 kmem_cache_destroy(ceph_cap_cachep); 951 kmem_cache_destroy(ceph_cap_flush_cachep); 952 kmem_cache_destroy(ceph_dentry_cachep); 953 kmem_cache_destroy(ceph_file_cachep); 954 kmem_cache_destroy(ceph_dir_file_cachep); 955 kmem_cache_destroy(ceph_mds_request_cachep); 956 mempool_destroy(ceph_wb_pagevec_pool); 957 } 958 959 static void __ceph_umount_begin(struct ceph_fs_client *fsc) 960 { 961 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 962 ceph_mdsc_force_umount(fsc->mdsc); 963 fsc->filp_gen++; // invalidate open files 964 } 965 966 /* 967 * ceph_umount_begin - initiate forced umount. Tear down the 968 * mount, skipping steps that may hang while waiting for server(s). 969 */ 970 void ceph_umount_begin(struct super_block *sb) 971 { 972 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 973 974 dout("ceph_umount_begin - starting forced umount\n"); 975 if (!fsc) 976 return; 977 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 978 __ceph_umount_begin(fsc); 979 } 980 981 static const struct super_operations ceph_super_ops = { 982 .alloc_inode = ceph_alloc_inode, 983 .free_inode = ceph_free_inode, 984 .write_inode = ceph_write_inode, 985 .drop_inode = generic_delete_inode, 986 .evict_inode = ceph_evict_inode, 987 .sync_fs = ceph_sync_fs, 988 .put_super = ceph_put_super, 989 .show_options = ceph_show_options, 990 .statfs = ceph_statfs, 991 .umount_begin = ceph_umount_begin, 992 }; 993 994 /* 995 * Bootstrap mount by opening the root directory. Note the mount 996 * @started time from caller, and time out if this takes too long. 997 */ 998 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 999 const char *path, 1000 unsigned long started) 1001 { 1002 struct ceph_mds_client *mdsc = fsc->mdsc; 1003 struct ceph_mds_request *req = NULL; 1004 int err; 1005 struct dentry *root; 1006 1007 /* open dir */ 1008 dout("open_root_inode opening '%s'\n", path); 1009 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1010 if (IS_ERR(req)) 1011 return ERR_CAST(req); 1012 req->r_path1 = kstrdup(path, GFP_NOFS); 1013 if (!req->r_path1) { 1014 root = ERR_PTR(-ENOMEM); 1015 goto out; 1016 } 1017 1018 req->r_ino1.ino = CEPH_INO_ROOT; 1019 req->r_ino1.snap = CEPH_NOSNAP; 1020 req->r_started = started; 1021 req->r_timeout = fsc->client->options->mount_timeout; 1022 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 1023 req->r_num_caps = 2; 1024 err = ceph_mdsc_do_request(mdsc, NULL, req); 1025 if (err == 0) { 1026 struct inode *inode = req->r_target_inode; 1027 req->r_target_inode = NULL; 1028 dout("open_root_inode success\n"); 1029 root = d_make_root(inode); 1030 if (!root) { 1031 root = ERR_PTR(-ENOMEM); 1032 goto out; 1033 } 1034 dout("open_root_inode success, root dentry is %p\n", root); 1035 } else { 1036 root = ERR_PTR(err); 1037 } 1038 out: 1039 ceph_mdsc_put_request(req); 1040 return root; 1041 } 1042 1043 /* 1044 * mount: join the ceph cluster, and open root directory. 1045 */ 1046 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 1047 struct fs_context *fc) 1048 { 1049 int err; 1050 unsigned long started = jiffies; /* note the start time */ 1051 struct dentry *root; 1052 1053 dout("mount start %p\n", fsc); 1054 mutex_lock(&fsc->client->mount_mutex); 1055 1056 if (!fsc->sb->s_root) { 1057 const char *path = fsc->mount_options->server_path ? 1058 fsc->mount_options->server_path + 1 : ""; 1059 1060 err = __ceph_open_session(fsc->client, started); 1061 if (err < 0) 1062 goto out; 1063 1064 /* setup fscache */ 1065 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 1066 err = ceph_fscache_register_fs(fsc, fc); 1067 if (err < 0) 1068 goto out; 1069 } 1070 1071 dout("mount opening path '%s'\n", path); 1072 1073 ceph_fs_debugfs_init(fsc); 1074 1075 root = open_root_dentry(fsc, path, started); 1076 if (IS_ERR(root)) { 1077 err = PTR_ERR(root); 1078 goto out; 1079 } 1080 fsc->sb->s_root = dget(root); 1081 } else { 1082 root = dget(fsc->sb->s_root); 1083 } 1084 1085 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1086 dout("mount success\n"); 1087 mutex_unlock(&fsc->client->mount_mutex); 1088 return root; 1089 1090 out: 1091 mutex_unlock(&fsc->client->mount_mutex); 1092 return ERR_PTR(err); 1093 } 1094 1095 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1096 { 1097 struct ceph_fs_client *fsc = s->s_fs_info; 1098 int ret; 1099 1100 dout("set_super %p\n", s); 1101 1102 s->s_maxbytes = MAX_LFS_FILESIZE; 1103 1104 s->s_xattr = ceph_xattr_handlers; 1105 fsc->sb = s; 1106 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1107 1108 s->s_op = &ceph_super_ops; 1109 s->s_d_op = &ceph_dentry_ops; 1110 s->s_export_op = &ceph_export_ops; 1111 1112 s->s_time_gran = 1; 1113 s->s_time_min = 0; 1114 s->s_time_max = U32_MAX; 1115 1116 ret = set_anon_super_fc(s, fc); 1117 if (ret != 0) 1118 fsc->sb = NULL; 1119 return ret; 1120 } 1121 1122 /* 1123 * share superblock if same fs AND options 1124 */ 1125 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1126 { 1127 struct ceph_fs_client *new = fc->s_fs_info; 1128 struct ceph_mount_options *fsopt = new->mount_options; 1129 struct ceph_options *opt = new->client->options; 1130 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1131 1132 dout("ceph_compare_super %p\n", sb); 1133 1134 if (compare_mount_options(fsopt, opt, fsc)) { 1135 dout("monitor(s)/mount options don't match\n"); 1136 return 0; 1137 } 1138 if ((opt->flags & CEPH_OPT_FSID) && 1139 ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { 1140 dout("fsid doesn't match\n"); 1141 return 0; 1142 } 1143 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1144 dout("flags differ\n"); 1145 return 0; 1146 } 1147 1148 if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { 1149 dout("client is blocklisted (and CLEANRECOVER is not set)\n"); 1150 return 0; 1151 } 1152 1153 if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 1154 dout("client has been forcibly unmounted\n"); 1155 return 0; 1156 } 1157 1158 return 1; 1159 } 1160 1161 /* 1162 * construct our own bdi so we can control readahead, etc. 1163 */ 1164 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1165 1166 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1167 { 1168 int err; 1169 1170 err = super_setup_bdi_name(sb, "ceph-%ld", 1171 atomic_long_inc_return(&bdi_seq)); 1172 if (err) 1173 return err; 1174 1175 /* set ra_pages based on rasize mount option? */ 1176 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1177 1178 /* set io_pages based on max osd read size */ 1179 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1180 1181 return 0; 1182 } 1183 1184 static int ceph_get_tree(struct fs_context *fc) 1185 { 1186 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1187 struct ceph_mount_options *fsopt = pctx->opts; 1188 struct super_block *sb; 1189 struct ceph_fs_client *fsc; 1190 struct dentry *res; 1191 int (*compare_super)(struct super_block *, struct fs_context *) = 1192 ceph_compare_super; 1193 int err; 1194 1195 dout("ceph_get_tree\n"); 1196 1197 if (!fc->source) 1198 return invalfc(fc, "No source"); 1199 if (fsopt->new_dev_syntax && !fsopt->mon_addr) 1200 return invalfc(fc, "No monitor address"); 1201 1202 /* create client (which we may/may not use) */ 1203 fsc = create_fs_client(pctx->opts, pctx->copts); 1204 pctx->opts = NULL; 1205 pctx->copts = NULL; 1206 if (IS_ERR(fsc)) { 1207 err = PTR_ERR(fsc); 1208 goto out_final; 1209 } 1210 1211 err = ceph_mdsc_init(fsc); 1212 if (err < 0) 1213 goto out; 1214 1215 if (ceph_test_opt(fsc->client, NOSHARE)) 1216 compare_super = NULL; 1217 1218 fc->s_fs_info = fsc; 1219 sb = sget_fc(fc, compare_super, ceph_set_super); 1220 fc->s_fs_info = NULL; 1221 if (IS_ERR(sb)) { 1222 err = PTR_ERR(sb); 1223 goto out; 1224 } 1225 1226 if (ceph_sb_to_client(sb) != fsc) { 1227 destroy_fs_client(fsc); 1228 fsc = ceph_sb_to_client(sb); 1229 dout("get_sb got existing client %p\n", fsc); 1230 } else { 1231 dout("get_sb using new client %p\n", fsc); 1232 err = ceph_setup_bdi(sb, fsc); 1233 if (err < 0) 1234 goto out_splat; 1235 } 1236 1237 res = ceph_real_mount(fsc, fc); 1238 if (IS_ERR(res)) { 1239 err = PTR_ERR(res); 1240 goto out_splat; 1241 } 1242 dout("root %p inode %p ino %llx.%llx\n", res, 1243 d_inode(res), ceph_vinop(d_inode(res))); 1244 fc->root = fsc->sb->s_root; 1245 return 0; 1246 1247 out_splat: 1248 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1249 pr_info("No mds server is up or the cluster is laggy\n"); 1250 err = -EHOSTUNREACH; 1251 } 1252 1253 ceph_mdsc_close_sessions(fsc->mdsc); 1254 deactivate_locked_super(sb); 1255 goto out_final; 1256 1257 out: 1258 destroy_fs_client(fsc); 1259 out_final: 1260 dout("ceph_get_tree fail %d\n", err); 1261 return err; 1262 } 1263 1264 static void ceph_free_fc(struct fs_context *fc) 1265 { 1266 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1267 1268 if (pctx) { 1269 destroy_mount_options(pctx->opts); 1270 ceph_destroy_options(pctx->copts); 1271 kfree(pctx); 1272 } 1273 } 1274 1275 static int ceph_reconfigure_fc(struct fs_context *fc) 1276 { 1277 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1278 struct ceph_mount_options *fsopt = pctx->opts; 1279 struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb); 1280 1281 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1282 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1283 else 1284 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1285 1286 if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { 1287 kfree(fsc->mount_options->mon_addr); 1288 fsc->mount_options->mon_addr = fsopt->mon_addr; 1289 fsopt->mon_addr = NULL; 1290 pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); 1291 } 1292 1293 sync_filesystem(fc->root->d_sb); 1294 return 0; 1295 } 1296 1297 static const struct fs_context_operations ceph_context_ops = { 1298 .free = ceph_free_fc, 1299 .parse_param = ceph_parse_mount_param, 1300 .get_tree = ceph_get_tree, 1301 .reconfigure = ceph_reconfigure_fc, 1302 }; 1303 1304 /* 1305 * Set up the filesystem mount context. 1306 */ 1307 static int ceph_init_fs_context(struct fs_context *fc) 1308 { 1309 struct ceph_parse_opts_ctx *pctx; 1310 struct ceph_mount_options *fsopt; 1311 1312 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1313 if (!pctx) 1314 return -ENOMEM; 1315 1316 pctx->copts = ceph_alloc_options(); 1317 if (!pctx->copts) 1318 goto nomem; 1319 1320 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1321 if (!pctx->opts) 1322 goto nomem; 1323 1324 fsopt = pctx->opts; 1325 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1326 1327 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1328 fsopt->rsize = CEPH_MAX_READ_SIZE; 1329 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1330 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1331 if (!fsopt->snapdir_name) 1332 goto nomem; 1333 1334 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1335 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1336 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1337 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1338 fsopt->congestion_kb = default_congestion_kb(); 1339 1340 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1341 fc->sb_flags |= SB_POSIXACL; 1342 #endif 1343 1344 fc->fs_private = pctx; 1345 fc->ops = &ceph_context_ops; 1346 return 0; 1347 1348 nomem: 1349 destroy_mount_options(pctx->opts); 1350 ceph_destroy_options(pctx->copts); 1351 kfree(pctx); 1352 return -ENOMEM; 1353 } 1354 1355 static void ceph_kill_sb(struct super_block *s) 1356 { 1357 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1358 1359 dout("kill_sb %p\n", s); 1360 1361 ceph_mdsc_pre_umount(fsc->mdsc); 1362 flush_fs_workqueues(fsc); 1363 1364 kill_anon_super(s); 1365 1366 fsc->client->extra_mon_dispatch = NULL; 1367 ceph_fs_debugfs_cleanup(fsc); 1368 1369 ceph_fscache_unregister_fs(fsc); 1370 1371 destroy_fs_client(fsc); 1372 } 1373 1374 static struct file_system_type ceph_fs_type = { 1375 .owner = THIS_MODULE, 1376 .name = "ceph", 1377 .init_fs_context = ceph_init_fs_context, 1378 .kill_sb = ceph_kill_sb, 1379 .fs_flags = FS_RENAME_DOES_D_MOVE, 1380 }; 1381 MODULE_ALIAS_FS("ceph"); 1382 1383 int ceph_force_reconnect(struct super_block *sb) 1384 { 1385 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1386 int err = 0; 1387 1388 fsc->mount_state = CEPH_MOUNT_RECOVER; 1389 __ceph_umount_begin(fsc); 1390 1391 /* Make sure all page caches get invalidated. 1392 * see remove_session_caps_cb() */ 1393 flush_workqueue(fsc->inode_wq); 1394 1395 /* In case that we were blocklisted. This also reset 1396 * all mon/osd connections */ 1397 ceph_reset_client_addr(fsc->client); 1398 1399 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1400 1401 fsc->blocklisted = false; 1402 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1403 1404 if (sb->s_root) { 1405 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1406 CEPH_STAT_CAP_INODE, true); 1407 } 1408 return err; 1409 } 1410 1411 static int __init init_ceph(void) 1412 { 1413 int ret = init_caches(); 1414 if (ret) 1415 goto out; 1416 1417 ceph_flock_init(); 1418 ret = register_filesystem(&ceph_fs_type); 1419 if (ret) 1420 goto out_caches; 1421 1422 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1423 1424 return 0; 1425 1426 out_caches: 1427 destroy_caches(); 1428 out: 1429 return ret; 1430 } 1431 1432 static void __exit exit_ceph(void) 1433 { 1434 dout("exit_ceph\n"); 1435 unregister_filesystem(&ceph_fs_type); 1436 destroy_caches(); 1437 } 1438 1439 static int param_set_metrics(const char *val, const struct kernel_param *kp) 1440 { 1441 struct ceph_fs_client *fsc; 1442 int ret; 1443 1444 ret = param_set_bool(val, kp); 1445 if (ret) { 1446 pr_err("Failed to parse sending metrics switch value '%s'\n", 1447 val); 1448 return ret; 1449 } else if (!disable_send_metrics) { 1450 // wake up all the mds clients 1451 spin_lock(&ceph_fsc_lock); 1452 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1453 metric_schedule_delayed(&fsc->mdsc->metric); 1454 } 1455 spin_unlock(&ceph_fsc_lock); 1456 } 1457 1458 return 0; 1459 } 1460 1461 static const struct kernel_param_ops param_ops_metrics = { 1462 .set = param_set_metrics, 1463 .get = param_get_bool, 1464 }; 1465 1466 bool disable_send_metrics = false; 1467 module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); 1468 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1469 1470 /* for both v1 and v2 syntax */ 1471 static bool mount_support = true; 1472 static const struct kernel_param_ops param_ops_mount_syntax = { 1473 .get = param_get_bool, 1474 }; 1475 module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); 1476 module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); 1477 1478 module_init(init_ceph); 1479 module_exit(exit_ceph); 1480 1481 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1482 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1483 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1484 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1485 MODULE_LICENSE("GPL"); 1486