1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 #include <uapi/linux/magic.h> 31 32 static DEFINE_SPINLOCK(ceph_fsc_lock); 33 static LIST_HEAD(ceph_fsc_list); 34 35 /* 36 * Ceph superblock operations 37 * 38 * Handle the basics of mounting, unmounting. 39 */ 40 41 /* 42 * super ops 43 */ 44 static void ceph_put_super(struct super_block *s) 45 { 46 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 47 48 dout("put_super\n"); 49 ceph_mdsc_close_sessions(fsc->mdsc); 50 } 51 52 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 53 { 54 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 55 struct ceph_mon_client *monc = &fsc->client->monc; 56 struct ceph_statfs st; 57 int i, err; 58 u64 data_pool; 59 60 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 61 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 62 } else { 63 data_pool = CEPH_NOPOOL; 64 } 65 66 dout("statfs\n"); 67 err = ceph_monc_do_statfs(monc, data_pool, &st); 68 if (err < 0) 69 return err; 70 71 /* fill in kstatfs */ 72 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 73 74 /* 75 * express utilization in terms of large blocks to avoid 76 * overflow on 32-bit machines. 77 * 78 * NOTE: for the time being, we make bsize == frsize to humor 79 * not-yet-ancient versions of glibc that are broken. 80 * Someday, we will probably want to report a real block 81 * size... whatever that may mean for a network file system! 82 */ 83 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 84 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 85 86 /* 87 * By default use root quota for stats; fallback to overall filesystem 88 * usage if using 'noquotadf' mount option or if the root dir doesn't 89 * have max_bytes quota set. 90 */ 91 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 92 !ceph_quota_update_statfs(fsc, buf)) { 93 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 94 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 95 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 96 } 97 98 buf->f_files = le64_to_cpu(st.num_objects); 99 buf->f_ffree = -1; 100 buf->f_namelen = NAME_MAX; 101 102 /* Must convert the fsid, for consistent values across arches */ 103 buf->f_fsid.val[0] = 0; 104 mutex_lock(&monc->mutex); 105 for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) 106 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); 107 mutex_unlock(&monc->mutex); 108 109 /* fold the fs_cluster_id into the upper bits */ 110 buf->f_fsid.val[1] = monc->fs_cluster_id; 111 112 return 0; 113 } 114 115 static int ceph_sync_fs(struct super_block *sb, int wait) 116 { 117 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 118 119 if (!wait) { 120 dout("sync_fs (non-blocking)\n"); 121 ceph_flush_dirty_caps(fsc->mdsc); 122 dout("sync_fs (non-blocking) done\n"); 123 return 0; 124 } 125 126 dout("sync_fs (blocking)\n"); 127 ceph_osdc_sync(&fsc->client->osdc); 128 ceph_mdsc_sync(fsc->mdsc); 129 dout("sync_fs (blocking) done\n"); 130 return 0; 131 } 132 133 /* 134 * mount options 135 */ 136 enum { 137 Opt_wsize, 138 Opt_rsize, 139 Opt_rasize, 140 Opt_caps_wanted_delay_min, 141 Opt_caps_wanted_delay_max, 142 Opt_caps_max, 143 Opt_readdir_max_entries, 144 Opt_readdir_max_bytes, 145 Opt_congestion_kb, 146 /* int args above */ 147 Opt_snapdirname, 148 Opt_mds_namespace, 149 Opt_recover_session, 150 Opt_source, 151 Opt_mon_addr, 152 /* string args above */ 153 Opt_dirstat, 154 Opt_rbytes, 155 Opt_asyncreaddir, 156 Opt_dcache, 157 Opt_ino32, 158 Opt_fscache, 159 Opt_poolperm, 160 Opt_require_active_mds, 161 Opt_acl, 162 Opt_quotadf, 163 Opt_copyfrom, 164 Opt_wsync, 165 Opt_pagecache, 166 }; 167 168 enum ceph_recover_session_mode { 169 ceph_recover_session_no, 170 ceph_recover_session_clean 171 }; 172 173 static const struct constant_table ceph_param_recover[] = { 174 { "no", ceph_recover_session_no }, 175 { "clean", ceph_recover_session_clean }, 176 {} 177 }; 178 179 static const struct fs_parameter_spec ceph_mount_parameters[] = { 180 fsparam_flag_no ("acl", Opt_acl), 181 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 182 fsparam_s32 ("caps_max", Opt_caps_max), 183 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 184 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 185 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 186 fsparam_flag_no ("copyfrom", Opt_copyfrom), 187 fsparam_flag_no ("dcache", Opt_dcache), 188 fsparam_flag_no ("dirstat", Opt_dirstat), 189 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 190 fsparam_string ("fsc", Opt_fscache), // fsc=... 191 fsparam_flag_no ("ino32", Opt_ino32), 192 fsparam_string ("mds_namespace", Opt_mds_namespace), 193 fsparam_flag_no ("poolperm", Opt_poolperm), 194 fsparam_flag_no ("quotadf", Opt_quotadf), 195 fsparam_u32 ("rasize", Opt_rasize), 196 fsparam_flag_no ("rbytes", Opt_rbytes), 197 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 198 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 199 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 200 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 201 fsparam_u32 ("rsize", Opt_rsize), 202 fsparam_string ("snapdirname", Opt_snapdirname), 203 fsparam_string ("source", Opt_source), 204 fsparam_string ("mon_addr", Opt_mon_addr), 205 fsparam_u32 ("wsize", Opt_wsize), 206 fsparam_flag_no ("wsync", Opt_wsync), 207 fsparam_flag_no ("pagecache", Opt_pagecache), 208 {} 209 }; 210 211 struct ceph_parse_opts_ctx { 212 struct ceph_options *copts; 213 struct ceph_mount_options *opts; 214 }; 215 216 /* 217 * Remove adjacent slashes and then the trailing slash, unless it is 218 * the only remaining character. 219 * 220 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 221 */ 222 static void canonicalize_path(char *path) 223 { 224 int i, j = 0; 225 226 for (i = 0; path[i] != '\0'; i++) { 227 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 228 path[j++] = path[i]; 229 } 230 231 if (j > 1 && path[j - 1] == '/') 232 j--; 233 path[j] = '\0'; 234 } 235 236 /* 237 * Check if the mds namespace in ceph_mount_options matches 238 * the passed in namespace string. First time match (when 239 * ->mds_namespace is NULL) is treated specially, since 240 * ->mds_namespace needs to be initialized by the caller. 241 */ 242 static int namespace_equals(struct ceph_mount_options *fsopt, 243 const char *namespace, size_t len) 244 { 245 return !(fsopt->mds_namespace && 246 (strlen(fsopt->mds_namespace) != len || 247 strncmp(fsopt->mds_namespace, namespace, len))); 248 } 249 250 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, 251 struct fs_context *fc) 252 { 253 int r; 254 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 255 struct ceph_mount_options *fsopt = pctx->opts; 256 257 if (*dev_name_end != ':') 258 return invalfc(fc, "separator ':' missing in source"); 259 260 r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, 261 pctx->copts, fc->log.log, ','); 262 if (r) 263 return r; 264 265 fsopt->new_dev_syntax = false; 266 return 0; 267 } 268 269 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, 270 struct fs_context *fc) 271 { 272 size_t len; 273 struct ceph_fsid fsid; 274 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 275 struct ceph_mount_options *fsopt = pctx->opts; 276 char *fsid_start, *fs_name_start; 277 278 if (*dev_name_end != '=') { 279 dout("separator '=' missing in source"); 280 return -EINVAL; 281 } 282 283 fsid_start = strchr(dev_name, '@'); 284 if (!fsid_start) 285 return invalfc(fc, "missing cluster fsid"); 286 ++fsid_start; /* start of cluster fsid */ 287 288 fs_name_start = strchr(fsid_start, '.'); 289 if (!fs_name_start) 290 return invalfc(fc, "missing file system name"); 291 292 if (ceph_parse_fsid(fsid_start, &fsid)) 293 return invalfc(fc, "Invalid FSID"); 294 295 ++fs_name_start; /* start of file system name */ 296 len = dev_name_end - fs_name_start; 297 298 if (!namespace_equals(fsopt, fs_name_start, len)) 299 return invalfc(fc, "Mismatching mds_namespace"); 300 kfree(fsopt->mds_namespace); 301 fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); 302 if (!fsopt->mds_namespace) 303 return -ENOMEM; 304 dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); 305 306 fsopt->new_dev_syntax = true; 307 return 0; 308 } 309 310 /* 311 * Parse the source parameter for new device format. Distinguish the device 312 * spec from the path. Try parsing new device format and fallback to old 313 * format if needed. 314 * 315 * New device syntax will looks like: 316 * <device_spec>=/<path> 317 * where 318 * <device_spec> is name@fsid.fsname 319 * <path> is optional, but if present must begin with '/' 320 * (monitor addresses are passed via mount option) 321 * 322 * Old device syntax is: 323 * <server_spec>[,<server_spec>...]:[<path>] 324 * where 325 * <server_spec> is <ip>[:<port>] 326 * <path> is optional, but if present must begin with '/' 327 */ 328 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 329 { 330 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 331 struct ceph_mount_options *fsopt = pctx->opts; 332 char *dev_name = param->string, *dev_name_end; 333 int ret; 334 335 dout("%s '%s'\n", __func__, dev_name); 336 if (!dev_name || !*dev_name) 337 return invalfc(fc, "Empty source"); 338 339 dev_name_end = strchr(dev_name, '/'); 340 if (dev_name_end) { 341 /* 342 * The server_path will include the whole chars from userland 343 * including the leading '/'. 344 */ 345 kfree(fsopt->server_path); 346 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 347 if (!fsopt->server_path) 348 return -ENOMEM; 349 350 canonicalize_path(fsopt->server_path); 351 } else { 352 dev_name_end = dev_name + strlen(dev_name); 353 } 354 355 dev_name_end--; /* back up to separator */ 356 if (dev_name_end < dev_name) 357 return invalfc(fc, "Path missing in source"); 358 359 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 360 if (fsopt->server_path) 361 dout("server path '%s'\n", fsopt->server_path); 362 363 dout("trying new device syntax"); 364 ret = ceph_parse_new_source(dev_name, dev_name_end, fc); 365 if (ret) { 366 if (ret != -EINVAL) 367 return ret; 368 dout("trying old device syntax"); 369 ret = ceph_parse_old_source(dev_name, dev_name_end, fc); 370 if (ret) 371 return ret; 372 } 373 374 fc->source = param->string; 375 param->string = NULL; 376 return 0; 377 } 378 379 static int ceph_parse_mon_addr(struct fs_parameter *param, 380 struct fs_context *fc) 381 { 382 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 383 struct ceph_mount_options *fsopt = pctx->opts; 384 385 kfree(fsopt->mon_addr); 386 fsopt->mon_addr = param->string; 387 param->string = NULL; 388 389 return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), 390 pctx->copts, fc->log.log, '/'); 391 } 392 393 static int ceph_parse_mount_param(struct fs_context *fc, 394 struct fs_parameter *param) 395 { 396 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 397 struct ceph_mount_options *fsopt = pctx->opts; 398 struct fs_parse_result result; 399 unsigned int mode; 400 int token, ret; 401 402 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 403 if (ret != -ENOPARAM) 404 return ret; 405 406 token = fs_parse(fc, ceph_mount_parameters, param, &result); 407 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 408 if (token < 0) 409 return token; 410 411 switch (token) { 412 case Opt_snapdirname: 413 kfree(fsopt->snapdir_name); 414 fsopt->snapdir_name = param->string; 415 param->string = NULL; 416 break; 417 case Opt_mds_namespace: 418 if (!namespace_equals(fsopt, param->string, strlen(param->string))) 419 return invalfc(fc, "Mismatching mds_namespace"); 420 kfree(fsopt->mds_namespace); 421 fsopt->mds_namespace = param->string; 422 param->string = NULL; 423 break; 424 case Opt_recover_session: 425 mode = result.uint_32; 426 if (mode == ceph_recover_session_no) 427 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 428 else if (mode == ceph_recover_session_clean) 429 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 430 else 431 BUG(); 432 break; 433 case Opt_source: 434 if (fc->source) 435 return invalfc(fc, "Multiple sources specified"); 436 return ceph_parse_source(param, fc); 437 case Opt_mon_addr: 438 return ceph_parse_mon_addr(param, fc); 439 case Opt_wsize: 440 if (result.uint_32 < PAGE_SIZE || 441 result.uint_32 > CEPH_MAX_WRITE_SIZE) 442 goto out_of_range; 443 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 444 break; 445 case Opt_rsize: 446 if (result.uint_32 < PAGE_SIZE || 447 result.uint_32 > CEPH_MAX_READ_SIZE) 448 goto out_of_range; 449 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 450 break; 451 case Opt_rasize: 452 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 453 break; 454 case Opt_caps_wanted_delay_min: 455 if (result.uint_32 < 1) 456 goto out_of_range; 457 fsopt->caps_wanted_delay_min = result.uint_32; 458 break; 459 case Opt_caps_wanted_delay_max: 460 if (result.uint_32 < 1) 461 goto out_of_range; 462 fsopt->caps_wanted_delay_max = result.uint_32; 463 break; 464 case Opt_caps_max: 465 if (result.int_32 < 0) 466 goto out_of_range; 467 fsopt->caps_max = result.int_32; 468 break; 469 case Opt_readdir_max_entries: 470 if (result.uint_32 < 1) 471 goto out_of_range; 472 fsopt->max_readdir = result.uint_32; 473 break; 474 case Opt_readdir_max_bytes: 475 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 476 goto out_of_range; 477 fsopt->max_readdir_bytes = result.uint_32; 478 break; 479 case Opt_congestion_kb: 480 if (result.uint_32 < 1024) /* at least 1M */ 481 goto out_of_range; 482 fsopt->congestion_kb = result.uint_32; 483 break; 484 case Opt_dirstat: 485 if (!result.negated) 486 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 487 else 488 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 489 break; 490 case Opt_rbytes: 491 if (!result.negated) 492 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 493 else 494 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 495 break; 496 case Opt_asyncreaddir: 497 if (!result.negated) 498 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 499 else 500 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 501 break; 502 case Opt_dcache: 503 if (!result.negated) 504 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 505 else 506 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 507 break; 508 case Opt_ino32: 509 if (!result.negated) 510 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 511 else 512 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 513 break; 514 515 case Opt_fscache: 516 #ifdef CONFIG_CEPH_FSCACHE 517 kfree(fsopt->fscache_uniq); 518 fsopt->fscache_uniq = NULL; 519 if (result.negated) { 520 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 521 } else { 522 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 523 fsopt->fscache_uniq = param->string; 524 param->string = NULL; 525 } 526 break; 527 #else 528 return invalfc(fc, "fscache support is disabled"); 529 #endif 530 case Opt_poolperm: 531 if (!result.negated) 532 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 533 else 534 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 535 break; 536 case Opt_require_active_mds: 537 if (!result.negated) 538 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 539 else 540 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 541 break; 542 case Opt_quotadf: 543 if (!result.negated) 544 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 545 else 546 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 547 break; 548 case Opt_copyfrom: 549 if (!result.negated) 550 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 551 else 552 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 553 break; 554 case Opt_acl: 555 if (!result.negated) { 556 #ifdef CONFIG_CEPH_FS_POSIX_ACL 557 fc->sb_flags |= SB_POSIXACL; 558 #else 559 return invalfc(fc, "POSIX ACL support is disabled"); 560 #endif 561 } else { 562 fc->sb_flags &= ~SB_POSIXACL; 563 } 564 break; 565 case Opt_wsync: 566 if (!result.negated) 567 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 568 else 569 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 570 break; 571 case Opt_pagecache: 572 if (result.negated) 573 fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; 574 else 575 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; 576 break; 577 default: 578 BUG(); 579 } 580 return 0; 581 582 out_of_range: 583 return invalfc(fc, "%s out of range", param->key); 584 } 585 586 static void destroy_mount_options(struct ceph_mount_options *args) 587 { 588 dout("destroy_mount_options %p\n", args); 589 if (!args) 590 return; 591 592 kfree(args->snapdir_name); 593 kfree(args->mds_namespace); 594 kfree(args->server_path); 595 kfree(args->fscache_uniq); 596 kfree(args->mon_addr); 597 kfree(args); 598 } 599 600 static int strcmp_null(const char *s1, const char *s2) 601 { 602 if (!s1 && !s2) 603 return 0; 604 if (s1 && !s2) 605 return -1; 606 if (!s1 && s2) 607 return 1; 608 return strcmp(s1, s2); 609 } 610 611 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 612 struct ceph_options *new_opt, 613 struct ceph_fs_client *fsc) 614 { 615 struct ceph_mount_options *fsopt1 = new_fsopt; 616 struct ceph_mount_options *fsopt2 = fsc->mount_options; 617 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 618 int ret; 619 620 ret = memcmp(fsopt1, fsopt2, ofs); 621 if (ret) 622 return ret; 623 624 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 625 if (ret) 626 return ret; 627 628 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 629 if (ret) 630 return ret; 631 632 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 633 if (ret) 634 return ret; 635 636 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 637 if (ret) 638 return ret; 639 640 ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); 641 if (ret) 642 return ret; 643 644 return ceph_compare_options(new_opt, fsc->client); 645 } 646 647 /** 648 * ceph_show_options - Show mount options in /proc/mounts 649 * @m: seq_file to write to 650 * @root: root of that (sub)tree 651 */ 652 static int ceph_show_options(struct seq_file *m, struct dentry *root) 653 { 654 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 655 struct ceph_mount_options *fsopt = fsc->mount_options; 656 size_t pos; 657 int ret; 658 659 /* a comma between MNT/MS and client options */ 660 seq_putc(m, ','); 661 pos = m->count; 662 663 ret = ceph_print_client_options(m, fsc->client, false); 664 if (ret) 665 return ret; 666 667 /* retract our comma if no client options */ 668 if (m->count == pos) 669 m->count--; 670 671 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 672 seq_puts(m, ",dirstat"); 673 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 674 seq_puts(m, ",rbytes"); 675 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 676 seq_puts(m, ",noasyncreaddir"); 677 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 678 seq_puts(m, ",nodcache"); 679 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 680 seq_puts(m, ",ino32"); 681 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 682 seq_show_option(m, "fsc", fsopt->fscache_uniq); 683 } 684 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 685 seq_puts(m, ",nopoolperm"); 686 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 687 seq_puts(m, ",noquotadf"); 688 689 #ifdef CONFIG_CEPH_FS_POSIX_ACL 690 if (root->d_sb->s_flags & SB_POSIXACL) 691 seq_puts(m, ",acl"); 692 else 693 seq_puts(m, ",noacl"); 694 #endif 695 696 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 697 seq_puts(m, ",copyfrom"); 698 699 /* dump mds_namespace when old device syntax is in use */ 700 if (fsopt->mds_namespace && !fsopt->new_dev_syntax) 701 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 702 703 if (fsopt->mon_addr) 704 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); 705 706 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 707 seq_show_option(m, "recover_session", "clean"); 708 709 if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) 710 seq_puts(m, ",wsync"); 711 712 if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) 713 seq_puts(m, ",nopagecache"); 714 715 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 716 seq_printf(m, ",wsize=%u", fsopt->wsize); 717 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 718 seq_printf(m, ",rsize=%u", fsopt->rsize); 719 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 720 seq_printf(m, ",rasize=%u", fsopt->rasize); 721 if (fsopt->congestion_kb != default_congestion_kb()) 722 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 723 if (fsopt->caps_max) 724 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 725 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 726 seq_printf(m, ",caps_wanted_delay_min=%u", 727 fsopt->caps_wanted_delay_min); 728 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 729 seq_printf(m, ",caps_wanted_delay_max=%u", 730 fsopt->caps_wanted_delay_max); 731 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 732 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 733 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 734 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 735 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 736 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 737 738 return 0; 739 } 740 741 /* 742 * handle any mon messages the standard library doesn't understand. 743 * return error if we don't either. 744 */ 745 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 746 { 747 struct ceph_fs_client *fsc = client->private; 748 int type = le16_to_cpu(msg->hdr.type); 749 750 switch (type) { 751 case CEPH_MSG_MDS_MAP: 752 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 753 return 0; 754 case CEPH_MSG_FS_MAP_USER: 755 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 756 return 0; 757 default: 758 return -1; 759 } 760 } 761 762 /* 763 * create a new fs client 764 * 765 * Success or not, this function consumes @fsopt and @opt. 766 */ 767 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 768 struct ceph_options *opt) 769 { 770 struct ceph_fs_client *fsc; 771 int err; 772 773 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 774 if (!fsc) { 775 err = -ENOMEM; 776 goto fail; 777 } 778 779 fsc->client = ceph_create_client(opt, fsc); 780 if (IS_ERR(fsc->client)) { 781 err = PTR_ERR(fsc->client); 782 goto fail; 783 } 784 opt = NULL; /* fsc->client now owns this */ 785 786 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 787 ceph_set_opt(fsc->client, ABORT_ON_FULL); 788 789 if (!fsopt->mds_namespace) { 790 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 791 0, true); 792 } else { 793 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 794 0, false); 795 } 796 797 fsc->mount_options = fsopt; 798 799 fsc->sb = NULL; 800 fsc->mount_state = CEPH_MOUNT_MOUNTING; 801 fsc->filp_gen = 1; 802 fsc->have_copy_from2 = true; 803 804 atomic_long_set(&fsc->writeback_count, 0); 805 806 err = -ENOMEM; 807 /* 808 * The number of concurrent works can be high but they don't need 809 * to be processed in parallel, limit concurrency. 810 */ 811 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 812 if (!fsc->inode_wq) 813 goto fail_client; 814 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 815 if (!fsc->cap_wq) 816 goto fail_inode_wq; 817 818 spin_lock(&ceph_fsc_lock); 819 list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 820 spin_unlock(&ceph_fsc_lock); 821 822 return fsc; 823 824 fail_inode_wq: 825 destroy_workqueue(fsc->inode_wq); 826 fail_client: 827 ceph_destroy_client(fsc->client); 828 fail: 829 kfree(fsc); 830 if (opt) 831 ceph_destroy_options(opt); 832 destroy_mount_options(fsopt); 833 return ERR_PTR(err); 834 } 835 836 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 837 { 838 flush_workqueue(fsc->inode_wq); 839 flush_workqueue(fsc->cap_wq); 840 } 841 842 static void destroy_fs_client(struct ceph_fs_client *fsc) 843 { 844 dout("destroy_fs_client %p\n", fsc); 845 846 spin_lock(&ceph_fsc_lock); 847 list_del(&fsc->metric_wakeup); 848 spin_unlock(&ceph_fsc_lock); 849 850 ceph_mdsc_destroy(fsc); 851 destroy_workqueue(fsc->inode_wq); 852 destroy_workqueue(fsc->cap_wq); 853 854 destroy_mount_options(fsc->mount_options); 855 856 ceph_destroy_client(fsc->client); 857 858 kfree(fsc); 859 dout("destroy_fs_client %p done\n", fsc); 860 } 861 862 /* 863 * caches 864 */ 865 struct kmem_cache *ceph_inode_cachep; 866 struct kmem_cache *ceph_cap_cachep; 867 struct kmem_cache *ceph_cap_flush_cachep; 868 struct kmem_cache *ceph_dentry_cachep; 869 struct kmem_cache *ceph_file_cachep; 870 struct kmem_cache *ceph_dir_file_cachep; 871 struct kmem_cache *ceph_mds_request_cachep; 872 mempool_t *ceph_wb_pagevec_pool; 873 874 static void ceph_inode_init_once(void *foo) 875 { 876 struct ceph_inode_info *ci = foo; 877 inode_init_once(&ci->vfs_inode); 878 } 879 880 static int __init init_caches(void) 881 { 882 int error = -ENOMEM; 883 884 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 885 sizeof(struct ceph_inode_info), 886 __alignof__(struct ceph_inode_info), 887 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 888 SLAB_ACCOUNT, ceph_inode_init_once); 889 if (!ceph_inode_cachep) 890 return -ENOMEM; 891 892 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 893 if (!ceph_cap_cachep) 894 goto bad_cap; 895 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 896 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 897 if (!ceph_cap_flush_cachep) 898 goto bad_cap_flush; 899 900 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 901 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 902 if (!ceph_dentry_cachep) 903 goto bad_dentry; 904 905 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 906 if (!ceph_file_cachep) 907 goto bad_file; 908 909 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 910 if (!ceph_dir_file_cachep) 911 goto bad_dir_file; 912 913 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 914 if (!ceph_mds_request_cachep) 915 goto bad_mds_req; 916 917 ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); 918 if (!ceph_wb_pagevec_pool) 919 goto bad_pagevec_pool; 920 921 return 0; 922 923 bad_pagevec_pool: 924 kmem_cache_destroy(ceph_mds_request_cachep); 925 bad_mds_req: 926 kmem_cache_destroy(ceph_dir_file_cachep); 927 bad_dir_file: 928 kmem_cache_destroy(ceph_file_cachep); 929 bad_file: 930 kmem_cache_destroy(ceph_dentry_cachep); 931 bad_dentry: 932 kmem_cache_destroy(ceph_cap_flush_cachep); 933 bad_cap_flush: 934 kmem_cache_destroy(ceph_cap_cachep); 935 bad_cap: 936 kmem_cache_destroy(ceph_inode_cachep); 937 return error; 938 } 939 940 static void destroy_caches(void) 941 { 942 /* 943 * Make sure all delayed rcu free inodes are flushed before we 944 * destroy cache. 945 */ 946 rcu_barrier(); 947 948 kmem_cache_destroy(ceph_inode_cachep); 949 kmem_cache_destroy(ceph_cap_cachep); 950 kmem_cache_destroy(ceph_cap_flush_cachep); 951 kmem_cache_destroy(ceph_dentry_cachep); 952 kmem_cache_destroy(ceph_file_cachep); 953 kmem_cache_destroy(ceph_dir_file_cachep); 954 kmem_cache_destroy(ceph_mds_request_cachep); 955 mempool_destroy(ceph_wb_pagevec_pool); 956 } 957 958 static void __ceph_umount_begin(struct ceph_fs_client *fsc) 959 { 960 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 961 ceph_mdsc_force_umount(fsc->mdsc); 962 fsc->filp_gen++; // invalidate open files 963 } 964 965 /* 966 * ceph_umount_begin - initiate forced umount. Tear down the 967 * mount, skipping steps that may hang while waiting for server(s). 968 */ 969 void ceph_umount_begin(struct super_block *sb) 970 { 971 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 972 973 dout("ceph_umount_begin - starting forced umount\n"); 974 if (!fsc) 975 return; 976 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 977 __ceph_umount_begin(fsc); 978 } 979 980 static const struct super_operations ceph_super_ops = { 981 .alloc_inode = ceph_alloc_inode, 982 .free_inode = ceph_free_inode, 983 .write_inode = ceph_write_inode, 984 .drop_inode = generic_delete_inode, 985 .evict_inode = ceph_evict_inode, 986 .sync_fs = ceph_sync_fs, 987 .put_super = ceph_put_super, 988 .show_options = ceph_show_options, 989 .statfs = ceph_statfs, 990 .umount_begin = ceph_umount_begin, 991 }; 992 993 /* 994 * Bootstrap mount by opening the root directory. Note the mount 995 * @started time from caller, and time out if this takes too long. 996 */ 997 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 998 const char *path, 999 unsigned long started) 1000 { 1001 struct ceph_mds_client *mdsc = fsc->mdsc; 1002 struct ceph_mds_request *req = NULL; 1003 int err; 1004 struct dentry *root; 1005 1006 /* open dir */ 1007 dout("open_root_inode opening '%s'\n", path); 1008 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1009 if (IS_ERR(req)) 1010 return ERR_CAST(req); 1011 req->r_path1 = kstrdup(path, GFP_NOFS); 1012 if (!req->r_path1) { 1013 root = ERR_PTR(-ENOMEM); 1014 goto out; 1015 } 1016 1017 req->r_ino1.ino = CEPH_INO_ROOT; 1018 req->r_ino1.snap = CEPH_NOSNAP; 1019 req->r_started = started; 1020 req->r_timeout = fsc->client->options->mount_timeout; 1021 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 1022 req->r_num_caps = 2; 1023 err = ceph_mdsc_do_request(mdsc, NULL, req); 1024 if (err == 0) { 1025 struct inode *inode = req->r_target_inode; 1026 req->r_target_inode = NULL; 1027 dout("open_root_inode success\n"); 1028 root = d_make_root(inode); 1029 if (!root) { 1030 root = ERR_PTR(-ENOMEM); 1031 goto out; 1032 } 1033 dout("open_root_inode success, root dentry is %p\n", root); 1034 } else { 1035 root = ERR_PTR(err); 1036 } 1037 out: 1038 ceph_mdsc_put_request(req); 1039 return root; 1040 } 1041 1042 /* 1043 * mount: join the ceph cluster, and open root directory. 1044 */ 1045 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 1046 struct fs_context *fc) 1047 { 1048 int err; 1049 unsigned long started = jiffies; /* note the start time */ 1050 struct dentry *root; 1051 1052 dout("mount start %p\n", fsc); 1053 mutex_lock(&fsc->client->mount_mutex); 1054 1055 if (!fsc->sb->s_root) { 1056 const char *path = fsc->mount_options->server_path ? 1057 fsc->mount_options->server_path + 1 : ""; 1058 1059 err = __ceph_open_session(fsc->client, started); 1060 if (err < 0) 1061 goto out; 1062 1063 /* setup fscache */ 1064 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 1065 err = ceph_fscache_register_fs(fsc, fc); 1066 if (err < 0) 1067 goto out; 1068 } 1069 1070 dout("mount opening path '%s'\n", path); 1071 1072 ceph_fs_debugfs_init(fsc); 1073 1074 root = open_root_dentry(fsc, path, started); 1075 if (IS_ERR(root)) { 1076 err = PTR_ERR(root); 1077 goto out; 1078 } 1079 fsc->sb->s_root = dget(root); 1080 } else { 1081 root = dget(fsc->sb->s_root); 1082 } 1083 1084 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1085 dout("mount success\n"); 1086 mutex_unlock(&fsc->client->mount_mutex); 1087 return root; 1088 1089 out: 1090 mutex_unlock(&fsc->client->mount_mutex); 1091 return ERR_PTR(err); 1092 } 1093 1094 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1095 { 1096 struct ceph_fs_client *fsc = s->s_fs_info; 1097 int ret; 1098 1099 dout("set_super %p\n", s); 1100 1101 s->s_maxbytes = MAX_LFS_FILESIZE; 1102 1103 s->s_xattr = ceph_xattr_handlers; 1104 fsc->sb = s; 1105 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1106 1107 s->s_op = &ceph_super_ops; 1108 s->s_d_op = &ceph_dentry_ops; 1109 s->s_export_op = &ceph_export_ops; 1110 1111 s->s_time_gran = 1; 1112 s->s_time_min = 0; 1113 s->s_time_max = U32_MAX; 1114 1115 ret = set_anon_super_fc(s, fc); 1116 if (ret != 0) 1117 fsc->sb = NULL; 1118 return ret; 1119 } 1120 1121 /* 1122 * share superblock if same fs AND options 1123 */ 1124 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1125 { 1126 struct ceph_fs_client *new = fc->s_fs_info; 1127 struct ceph_mount_options *fsopt = new->mount_options; 1128 struct ceph_options *opt = new->client->options; 1129 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1130 1131 dout("ceph_compare_super %p\n", sb); 1132 1133 if (compare_mount_options(fsopt, opt, fsc)) { 1134 dout("monitor(s)/mount options don't match\n"); 1135 return 0; 1136 } 1137 if ((opt->flags & CEPH_OPT_FSID) && 1138 ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { 1139 dout("fsid doesn't match\n"); 1140 return 0; 1141 } 1142 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1143 dout("flags differ\n"); 1144 return 0; 1145 } 1146 1147 if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { 1148 dout("client is blocklisted (and CLEANRECOVER is not set)\n"); 1149 return 0; 1150 } 1151 1152 if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 1153 dout("client has been forcibly unmounted\n"); 1154 return 0; 1155 } 1156 1157 return 1; 1158 } 1159 1160 /* 1161 * construct our own bdi so we can control readahead, etc. 1162 */ 1163 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1164 1165 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1166 { 1167 int err; 1168 1169 err = super_setup_bdi_name(sb, "ceph-%ld", 1170 atomic_long_inc_return(&bdi_seq)); 1171 if (err) 1172 return err; 1173 1174 /* set ra_pages based on rasize mount option? */ 1175 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1176 1177 /* set io_pages based on max osd read size */ 1178 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1179 1180 return 0; 1181 } 1182 1183 static int ceph_get_tree(struct fs_context *fc) 1184 { 1185 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1186 struct ceph_mount_options *fsopt = pctx->opts; 1187 struct super_block *sb; 1188 struct ceph_fs_client *fsc; 1189 struct dentry *res; 1190 int (*compare_super)(struct super_block *, struct fs_context *) = 1191 ceph_compare_super; 1192 int err; 1193 1194 dout("ceph_get_tree\n"); 1195 1196 if (!fc->source) 1197 return invalfc(fc, "No source"); 1198 if (fsopt->new_dev_syntax && !fsopt->mon_addr) 1199 return invalfc(fc, "No monitor address"); 1200 1201 /* create client (which we may/may not use) */ 1202 fsc = create_fs_client(pctx->opts, pctx->copts); 1203 pctx->opts = NULL; 1204 pctx->copts = NULL; 1205 if (IS_ERR(fsc)) { 1206 err = PTR_ERR(fsc); 1207 goto out_final; 1208 } 1209 1210 err = ceph_mdsc_init(fsc); 1211 if (err < 0) 1212 goto out; 1213 1214 if (ceph_test_opt(fsc->client, NOSHARE)) 1215 compare_super = NULL; 1216 1217 fc->s_fs_info = fsc; 1218 sb = sget_fc(fc, compare_super, ceph_set_super); 1219 fc->s_fs_info = NULL; 1220 if (IS_ERR(sb)) { 1221 err = PTR_ERR(sb); 1222 goto out; 1223 } 1224 1225 if (ceph_sb_to_client(sb) != fsc) { 1226 destroy_fs_client(fsc); 1227 fsc = ceph_sb_to_client(sb); 1228 dout("get_sb got existing client %p\n", fsc); 1229 } else { 1230 dout("get_sb using new client %p\n", fsc); 1231 err = ceph_setup_bdi(sb, fsc); 1232 if (err < 0) 1233 goto out_splat; 1234 } 1235 1236 res = ceph_real_mount(fsc, fc); 1237 if (IS_ERR(res)) { 1238 err = PTR_ERR(res); 1239 goto out_splat; 1240 } 1241 dout("root %p inode %p ino %llx.%llx\n", res, 1242 d_inode(res), ceph_vinop(d_inode(res))); 1243 fc->root = fsc->sb->s_root; 1244 return 0; 1245 1246 out_splat: 1247 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1248 pr_info("No mds server is up or the cluster is laggy\n"); 1249 err = -EHOSTUNREACH; 1250 } 1251 1252 ceph_mdsc_close_sessions(fsc->mdsc); 1253 deactivate_locked_super(sb); 1254 goto out_final; 1255 1256 out: 1257 destroy_fs_client(fsc); 1258 out_final: 1259 dout("ceph_get_tree fail %d\n", err); 1260 return err; 1261 } 1262 1263 static void ceph_free_fc(struct fs_context *fc) 1264 { 1265 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1266 1267 if (pctx) { 1268 destroy_mount_options(pctx->opts); 1269 ceph_destroy_options(pctx->copts); 1270 kfree(pctx); 1271 } 1272 } 1273 1274 static int ceph_reconfigure_fc(struct fs_context *fc) 1275 { 1276 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1277 struct ceph_mount_options *fsopt = pctx->opts; 1278 struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb); 1279 1280 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1281 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1282 else 1283 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1284 1285 if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { 1286 kfree(fsc->mount_options->mon_addr); 1287 fsc->mount_options->mon_addr = fsopt->mon_addr; 1288 fsopt->mon_addr = NULL; 1289 pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); 1290 } 1291 1292 sync_filesystem(fc->root->d_sb); 1293 return 0; 1294 } 1295 1296 static const struct fs_context_operations ceph_context_ops = { 1297 .free = ceph_free_fc, 1298 .parse_param = ceph_parse_mount_param, 1299 .get_tree = ceph_get_tree, 1300 .reconfigure = ceph_reconfigure_fc, 1301 }; 1302 1303 /* 1304 * Set up the filesystem mount context. 1305 */ 1306 static int ceph_init_fs_context(struct fs_context *fc) 1307 { 1308 struct ceph_parse_opts_ctx *pctx; 1309 struct ceph_mount_options *fsopt; 1310 1311 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1312 if (!pctx) 1313 return -ENOMEM; 1314 1315 pctx->copts = ceph_alloc_options(); 1316 if (!pctx->copts) 1317 goto nomem; 1318 1319 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1320 if (!pctx->opts) 1321 goto nomem; 1322 1323 fsopt = pctx->opts; 1324 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1325 1326 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1327 fsopt->rsize = CEPH_MAX_READ_SIZE; 1328 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1329 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1330 if (!fsopt->snapdir_name) 1331 goto nomem; 1332 1333 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1334 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1335 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1336 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1337 fsopt->congestion_kb = default_congestion_kb(); 1338 1339 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1340 fc->sb_flags |= SB_POSIXACL; 1341 #endif 1342 1343 fc->fs_private = pctx; 1344 fc->ops = &ceph_context_ops; 1345 return 0; 1346 1347 nomem: 1348 destroy_mount_options(pctx->opts); 1349 ceph_destroy_options(pctx->copts); 1350 kfree(pctx); 1351 return -ENOMEM; 1352 } 1353 1354 static void ceph_kill_sb(struct super_block *s) 1355 { 1356 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1357 1358 dout("kill_sb %p\n", s); 1359 1360 ceph_mdsc_pre_umount(fsc->mdsc); 1361 flush_fs_workqueues(fsc); 1362 1363 kill_anon_super(s); 1364 1365 fsc->client->extra_mon_dispatch = NULL; 1366 ceph_fs_debugfs_cleanup(fsc); 1367 1368 ceph_fscache_unregister_fs(fsc); 1369 1370 destroy_fs_client(fsc); 1371 } 1372 1373 static struct file_system_type ceph_fs_type = { 1374 .owner = THIS_MODULE, 1375 .name = "ceph", 1376 .init_fs_context = ceph_init_fs_context, 1377 .kill_sb = ceph_kill_sb, 1378 .fs_flags = FS_RENAME_DOES_D_MOVE, 1379 }; 1380 MODULE_ALIAS_FS("ceph"); 1381 1382 int ceph_force_reconnect(struct super_block *sb) 1383 { 1384 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1385 int err = 0; 1386 1387 fsc->mount_state = CEPH_MOUNT_RECOVER; 1388 __ceph_umount_begin(fsc); 1389 1390 /* Make sure all page caches get invalidated. 1391 * see remove_session_caps_cb() */ 1392 flush_workqueue(fsc->inode_wq); 1393 1394 /* In case that we were blocklisted. This also reset 1395 * all mon/osd connections */ 1396 ceph_reset_client_addr(fsc->client); 1397 1398 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1399 1400 fsc->blocklisted = false; 1401 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1402 1403 if (sb->s_root) { 1404 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1405 CEPH_STAT_CAP_INODE, true); 1406 } 1407 return err; 1408 } 1409 1410 static int __init init_ceph(void) 1411 { 1412 int ret = init_caches(); 1413 if (ret) 1414 goto out; 1415 1416 ceph_flock_init(); 1417 ret = register_filesystem(&ceph_fs_type); 1418 if (ret) 1419 goto out_caches; 1420 1421 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1422 1423 return 0; 1424 1425 out_caches: 1426 destroy_caches(); 1427 out: 1428 return ret; 1429 } 1430 1431 static void __exit exit_ceph(void) 1432 { 1433 dout("exit_ceph\n"); 1434 unregister_filesystem(&ceph_fs_type); 1435 destroy_caches(); 1436 } 1437 1438 static int param_set_metrics(const char *val, const struct kernel_param *kp) 1439 { 1440 struct ceph_fs_client *fsc; 1441 int ret; 1442 1443 ret = param_set_bool(val, kp); 1444 if (ret) { 1445 pr_err("Failed to parse sending metrics switch value '%s'\n", 1446 val); 1447 return ret; 1448 } else if (!disable_send_metrics) { 1449 // wake up all the mds clients 1450 spin_lock(&ceph_fsc_lock); 1451 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1452 metric_schedule_delayed(&fsc->mdsc->metric); 1453 } 1454 spin_unlock(&ceph_fsc_lock); 1455 } 1456 1457 return 0; 1458 } 1459 1460 static const struct kernel_param_ops param_ops_metrics = { 1461 .set = param_set_metrics, 1462 .get = param_get_bool, 1463 }; 1464 1465 bool disable_send_metrics = false; 1466 module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); 1467 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1468 1469 /* for both v1 and v2 syntax */ 1470 static bool mount_support = true; 1471 static const struct kernel_param_ops param_ops_mount_syntax = { 1472 .get = param_get_bool, 1473 }; 1474 module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); 1475 module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); 1476 1477 module_init(init_ceph); 1478 module_exit(exit_ceph); 1479 1480 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1481 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1482 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1483 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1484 MODULE_LICENSE("GPL"); 1485