1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 #include <uapi/linux/magic.h> 31 32 static DEFINE_SPINLOCK(ceph_fsc_lock); 33 static LIST_HEAD(ceph_fsc_list); 34 35 /* 36 * Ceph superblock operations 37 * 38 * Handle the basics of mounting, unmounting. 39 */ 40 41 /* 42 * super ops 43 */ 44 static void ceph_put_super(struct super_block *s) 45 { 46 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 47 48 dout("put_super\n"); 49 ceph_mdsc_close_sessions(fsc->mdsc); 50 } 51 52 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 53 { 54 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 55 struct ceph_mon_client *monc = &fsc->client->monc; 56 struct ceph_statfs st; 57 int i, err; 58 u64 data_pool; 59 60 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 61 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 62 } else { 63 data_pool = CEPH_NOPOOL; 64 } 65 66 dout("statfs\n"); 67 err = ceph_monc_do_statfs(monc, data_pool, &st); 68 if (err < 0) 69 return err; 70 71 /* fill in kstatfs */ 72 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 73 74 /* 75 * express utilization in terms of large blocks to avoid 76 * overflow on 32-bit machines. 77 * 78 * NOTE: for the time being, we make bsize == frsize to humor 79 * not-yet-ancient versions of glibc that are broken. 80 * Someday, we will probably want to report a real block 81 * size... whatever that may mean for a network file system! 82 */ 83 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 84 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 85 86 /* 87 * By default use root quota for stats; fallback to overall filesystem 88 * usage if using 'noquotadf' mount option or if the root dir doesn't 89 * have max_bytes quota set. 90 */ 91 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 92 !ceph_quota_update_statfs(fsc, buf)) { 93 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 94 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 95 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 96 } 97 98 buf->f_files = le64_to_cpu(st.num_objects); 99 buf->f_ffree = -1; 100 buf->f_namelen = NAME_MAX; 101 102 /* Must convert the fsid, for consistent values across arches */ 103 buf->f_fsid.val[0] = 0; 104 mutex_lock(&monc->mutex); 105 for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) 106 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); 107 mutex_unlock(&monc->mutex); 108 109 /* fold the fs_cluster_id into the upper bits */ 110 buf->f_fsid.val[1] = monc->fs_cluster_id; 111 112 return 0; 113 } 114 115 static int ceph_sync_fs(struct super_block *sb, int wait) 116 { 117 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 118 119 if (!wait) { 120 dout("sync_fs (non-blocking)\n"); 121 ceph_flush_dirty_caps(fsc->mdsc); 122 dout("sync_fs (non-blocking) done\n"); 123 return 0; 124 } 125 126 dout("sync_fs (blocking)\n"); 127 ceph_osdc_sync(&fsc->client->osdc); 128 ceph_mdsc_sync(fsc->mdsc); 129 dout("sync_fs (blocking) done\n"); 130 return 0; 131 } 132 133 /* 134 * mount options 135 */ 136 enum { 137 Opt_wsize, 138 Opt_rsize, 139 Opt_rasize, 140 Opt_caps_wanted_delay_min, 141 Opt_caps_wanted_delay_max, 142 Opt_caps_max, 143 Opt_readdir_max_entries, 144 Opt_readdir_max_bytes, 145 Opt_congestion_kb, 146 /* int args above */ 147 Opt_snapdirname, 148 Opt_mds_namespace, 149 Opt_recover_session, 150 Opt_source, 151 Opt_mon_addr, 152 /* string args above */ 153 Opt_dirstat, 154 Opt_rbytes, 155 Opt_asyncreaddir, 156 Opt_dcache, 157 Opt_ino32, 158 Opt_fscache, 159 Opt_poolperm, 160 Opt_require_active_mds, 161 Opt_acl, 162 Opt_quotadf, 163 Opt_copyfrom, 164 Opt_wsync, 165 Opt_pagecache, 166 }; 167 168 enum ceph_recover_session_mode { 169 ceph_recover_session_no, 170 ceph_recover_session_clean 171 }; 172 173 static const struct constant_table ceph_param_recover[] = { 174 { "no", ceph_recover_session_no }, 175 { "clean", ceph_recover_session_clean }, 176 {} 177 }; 178 179 static const struct fs_parameter_spec ceph_mount_parameters[] = { 180 fsparam_flag_no ("acl", Opt_acl), 181 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 182 fsparam_s32 ("caps_max", Opt_caps_max), 183 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 184 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 185 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 186 fsparam_flag_no ("copyfrom", Opt_copyfrom), 187 fsparam_flag_no ("dcache", Opt_dcache), 188 fsparam_flag_no ("dirstat", Opt_dirstat), 189 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 190 fsparam_string ("fsc", Opt_fscache), // fsc=... 191 fsparam_flag_no ("ino32", Opt_ino32), 192 fsparam_string ("mds_namespace", Opt_mds_namespace), 193 fsparam_flag_no ("poolperm", Opt_poolperm), 194 fsparam_flag_no ("quotadf", Opt_quotadf), 195 fsparam_u32 ("rasize", Opt_rasize), 196 fsparam_flag_no ("rbytes", Opt_rbytes), 197 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 198 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 199 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 200 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 201 fsparam_u32 ("rsize", Opt_rsize), 202 fsparam_string ("snapdirname", Opt_snapdirname), 203 fsparam_string ("source", Opt_source), 204 fsparam_string ("mon_addr", Opt_mon_addr), 205 fsparam_u32 ("wsize", Opt_wsize), 206 fsparam_flag_no ("wsync", Opt_wsync), 207 fsparam_flag_no ("pagecache", Opt_pagecache), 208 {} 209 }; 210 211 struct ceph_parse_opts_ctx { 212 struct ceph_options *copts; 213 struct ceph_mount_options *opts; 214 }; 215 216 /* 217 * Remove adjacent slashes and then the trailing slash, unless it is 218 * the only remaining character. 219 * 220 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 221 */ 222 static void canonicalize_path(char *path) 223 { 224 int i, j = 0; 225 226 for (i = 0; path[i] != '\0'; i++) { 227 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 228 path[j++] = path[i]; 229 } 230 231 if (j > 1 && path[j - 1] == '/') 232 j--; 233 path[j] = '\0'; 234 } 235 236 /* 237 * Check if the mds namespace in ceph_mount_options matches 238 * the passed in namespace string. First time match (when 239 * ->mds_namespace is NULL) is treated specially, since 240 * ->mds_namespace needs to be initialized by the caller. 241 */ 242 static int namespace_equals(struct ceph_mount_options *fsopt, 243 const char *namespace, size_t len) 244 { 245 return !(fsopt->mds_namespace && 246 (strlen(fsopt->mds_namespace) != len || 247 strncmp(fsopt->mds_namespace, namespace, len))); 248 } 249 250 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, 251 struct fs_context *fc) 252 { 253 int r; 254 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 255 struct ceph_mount_options *fsopt = pctx->opts; 256 257 if (*dev_name_end != ':') 258 return invalfc(fc, "separator ':' missing in source"); 259 260 r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, 261 pctx->copts, fc->log.log, ','); 262 if (r) 263 return r; 264 265 fsopt->new_dev_syntax = false; 266 return 0; 267 } 268 269 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, 270 struct fs_context *fc) 271 { 272 size_t len; 273 struct ceph_fsid fsid; 274 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 275 struct ceph_mount_options *fsopt = pctx->opts; 276 char *fsid_start, *fs_name_start; 277 278 if (*dev_name_end != '=') { 279 dout("separator '=' missing in source"); 280 return -EINVAL; 281 } 282 283 fsid_start = strchr(dev_name, '@'); 284 if (!fsid_start) 285 return invalfc(fc, "missing cluster fsid"); 286 ++fsid_start; /* start of cluster fsid */ 287 288 fs_name_start = strchr(fsid_start, '.'); 289 if (!fs_name_start) 290 return invalfc(fc, "missing file system name"); 291 292 if (ceph_parse_fsid(fsid_start, &fsid)) 293 return invalfc(fc, "Invalid FSID"); 294 295 ++fs_name_start; /* start of file system name */ 296 len = dev_name_end - fs_name_start; 297 298 if (!namespace_equals(fsopt, fs_name_start, len)) 299 return invalfc(fc, "Mismatching mds_namespace"); 300 kfree(fsopt->mds_namespace); 301 fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); 302 if (!fsopt->mds_namespace) 303 return -ENOMEM; 304 dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); 305 306 fsopt->new_dev_syntax = true; 307 return 0; 308 } 309 310 /* 311 * Parse the source parameter for new device format. Distinguish the device 312 * spec from the path. Try parsing new device format and fallback to old 313 * format if needed. 314 * 315 * New device syntax will looks like: 316 * <device_spec>=/<path> 317 * where 318 * <device_spec> is name@fsid.fsname 319 * <path> is optional, but if present must begin with '/' 320 * (monitor addresses are passed via mount option) 321 * 322 * Old device syntax is: 323 * <server_spec>[,<server_spec>...]:[<path>] 324 * where 325 * <server_spec> is <ip>[:<port>] 326 * <path> is optional, but if present must begin with '/' 327 */ 328 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 329 { 330 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 331 struct ceph_mount_options *fsopt = pctx->opts; 332 char *dev_name = param->string, *dev_name_end; 333 int ret; 334 335 dout("%s '%s'\n", __func__, dev_name); 336 if (!dev_name || !*dev_name) 337 return invalfc(fc, "Empty source"); 338 339 dev_name_end = strchr(dev_name, '/'); 340 if (dev_name_end) { 341 /* 342 * The server_path will include the whole chars from userland 343 * including the leading '/'. 344 */ 345 kfree(fsopt->server_path); 346 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 347 if (!fsopt->server_path) 348 return -ENOMEM; 349 350 canonicalize_path(fsopt->server_path); 351 } else { 352 dev_name_end = dev_name + strlen(dev_name); 353 } 354 355 dev_name_end--; /* back up to separator */ 356 if (dev_name_end < dev_name) 357 return invalfc(fc, "Path missing in source"); 358 359 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 360 if (fsopt->server_path) 361 dout("server path '%s'\n", fsopt->server_path); 362 363 dout("trying new device syntax"); 364 ret = ceph_parse_new_source(dev_name, dev_name_end, fc); 365 if (ret) { 366 if (ret != -EINVAL) 367 return ret; 368 dout("trying old device syntax"); 369 ret = ceph_parse_old_source(dev_name, dev_name_end, fc); 370 if (ret) 371 return ret; 372 } 373 374 fc->source = param->string; 375 param->string = NULL; 376 return 0; 377 } 378 379 static int ceph_parse_mon_addr(struct fs_parameter *param, 380 struct fs_context *fc) 381 { 382 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 383 struct ceph_mount_options *fsopt = pctx->opts; 384 385 kfree(fsopt->mon_addr); 386 fsopt->mon_addr = param->string; 387 param->string = NULL; 388 389 return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), 390 pctx->copts, fc->log.log, '/'); 391 } 392 393 static int ceph_parse_mount_param(struct fs_context *fc, 394 struct fs_parameter *param) 395 { 396 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 397 struct ceph_mount_options *fsopt = pctx->opts; 398 struct fs_parse_result result; 399 unsigned int mode; 400 int token, ret; 401 402 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 403 if (ret != -ENOPARAM) 404 return ret; 405 406 token = fs_parse(fc, ceph_mount_parameters, param, &result); 407 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 408 if (token < 0) 409 return token; 410 411 switch (token) { 412 case Opt_snapdirname: 413 kfree(fsopt->snapdir_name); 414 fsopt->snapdir_name = param->string; 415 param->string = NULL; 416 break; 417 case Opt_mds_namespace: 418 if (!namespace_equals(fsopt, param->string, strlen(param->string))) 419 return invalfc(fc, "Mismatching mds_namespace"); 420 kfree(fsopt->mds_namespace); 421 fsopt->mds_namespace = param->string; 422 param->string = NULL; 423 break; 424 case Opt_recover_session: 425 mode = result.uint_32; 426 if (mode == ceph_recover_session_no) 427 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 428 else if (mode == ceph_recover_session_clean) 429 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 430 else 431 BUG(); 432 break; 433 case Opt_source: 434 if (fc->source) 435 return invalfc(fc, "Multiple sources specified"); 436 return ceph_parse_source(param, fc); 437 case Opt_mon_addr: 438 return ceph_parse_mon_addr(param, fc); 439 case Opt_wsize: 440 if (result.uint_32 < PAGE_SIZE || 441 result.uint_32 > CEPH_MAX_WRITE_SIZE) 442 goto out_of_range; 443 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 444 break; 445 case Opt_rsize: 446 if (result.uint_32 < PAGE_SIZE || 447 result.uint_32 > CEPH_MAX_READ_SIZE) 448 goto out_of_range; 449 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 450 break; 451 case Opt_rasize: 452 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 453 break; 454 case Opt_caps_wanted_delay_min: 455 if (result.uint_32 < 1) 456 goto out_of_range; 457 fsopt->caps_wanted_delay_min = result.uint_32; 458 break; 459 case Opt_caps_wanted_delay_max: 460 if (result.uint_32 < 1) 461 goto out_of_range; 462 fsopt->caps_wanted_delay_max = result.uint_32; 463 break; 464 case Opt_caps_max: 465 if (result.int_32 < 0) 466 goto out_of_range; 467 fsopt->caps_max = result.int_32; 468 break; 469 case Opt_readdir_max_entries: 470 if (result.uint_32 < 1) 471 goto out_of_range; 472 fsopt->max_readdir = result.uint_32; 473 break; 474 case Opt_readdir_max_bytes: 475 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 476 goto out_of_range; 477 fsopt->max_readdir_bytes = result.uint_32; 478 break; 479 case Opt_congestion_kb: 480 if (result.uint_32 < 1024) /* at least 1M */ 481 goto out_of_range; 482 fsopt->congestion_kb = result.uint_32; 483 break; 484 case Opt_dirstat: 485 if (!result.negated) 486 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 487 else 488 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 489 break; 490 case Opt_rbytes: 491 if (!result.negated) 492 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 493 else 494 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 495 break; 496 case Opt_asyncreaddir: 497 if (!result.negated) 498 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 499 else 500 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 501 break; 502 case Opt_dcache: 503 if (!result.negated) 504 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 505 else 506 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 507 break; 508 case Opt_ino32: 509 if (!result.negated) 510 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 511 else 512 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 513 break; 514 515 case Opt_fscache: 516 #ifdef CONFIG_CEPH_FSCACHE 517 kfree(fsopt->fscache_uniq); 518 fsopt->fscache_uniq = NULL; 519 if (result.negated) { 520 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 521 } else { 522 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 523 fsopt->fscache_uniq = param->string; 524 param->string = NULL; 525 } 526 break; 527 #else 528 return invalfc(fc, "fscache support is disabled"); 529 #endif 530 case Opt_poolperm: 531 if (!result.negated) 532 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 533 else 534 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 535 break; 536 case Opt_require_active_mds: 537 if (!result.negated) 538 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 539 else 540 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 541 break; 542 case Opt_quotadf: 543 if (!result.negated) 544 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 545 else 546 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 547 break; 548 case Opt_copyfrom: 549 if (!result.negated) 550 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 551 else 552 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 553 break; 554 case Opt_acl: 555 if (!result.negated) { 556 #ifdef CONFIG_CEPH_FS_POSIX_ACL 557 fc->sb_flags |= SB_POSIXACL; 558 #else 559 return invalfc(fc, "POSIX ACL support is disabled"); 560 #endif 561 } else { 562 fc->sb_flags &= ~SB_POSIXACL; 563 } 564 break; 565 case Opt_wsync: 566 if (!result.negated) 567 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 568 else 569 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 570 break; 571 case Opt_pagecache: 572 if (result.negated) 573 fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; 574 else 575 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; 576 break; 577 default: 578 BUG(); 579 } 580 return 0; 581 582 out_of_range: 583 return invalfc(fc, "%s out of range", param->key); 584 } 585 586 static void destroy_mount_options(struct ceph_mount_options *args) 587 { 588 dout("destroy_mount_options %p\n", args); 589 if (!args) 590 return; 591 592 kfree(args->snapdir_name); 593 kfree(args->mds_namespace); 594 kfree(args->server_path); 595 kfree(args->fscache_uniq); 596 kfree(args->mon_addr); 597 kfree(args); 598 } 599 600 static int strcmp_null(const char *s1, const char *s2) 601 { 602 if (!s1 && !s2) 603 return 0; 604 if (s1 && !s2) 605 return -1; 606 if (!s1 && s2) 607 return 1; 608 return strcmp(s1, s2); 609 } 610 611 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 612 struct ceph_options *new_opt, 613 struct ceph_fs_client *fsc) 614 { 615 struct ceph_mount_options *fsopt1 = new_fsopt; 616 struct ceph_mount_options *fsopt2 = fsc->mount_options; 617 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 618 int ret; 619 620 ret = memcmp(fsopt1, fsopt2, ofs); 621 if (ret) 622 return ret; 623 624 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 625 if (ret) 626 return ret; 627 628 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 629 if (ret) 630 return ret; 631 632 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 633 if (ret) 634 return ret; 635 636 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 637 if (ret) 638 return ret; 639 640 ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); 641 if (ret) 642 return ret; 643 644 return ceph_compare_options(new_opt, fsc->client); 645 } 646 647 /** 648 * ceph_show_options - Show mount options in /proc/mounts 649 * @m: seq_file to write to 650 * @root: root of that (sub)tree 651 */ 652 static int ceph_show_options(struct seq_file *m, struct dentry *root) 653 { 654 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 655 struct ceph_mount_options *fsopt = fsc->mount_options; 656 size_t pos; 657 int ret; 658 659 /* a comma between MNT/MS and client options */ 660 seq_putc(m, ','); 661 pos = m->count; 662 663 ret = ceph_print_client_options(m, fsc->client, false); 664 if (ret) 665 return ret; 666 667 /* retract our comma if no client options */ 668 if (m->count == pos) 669 m->count--; 670 671 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 672 seq_puts(m, ",dirstat"); 673 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 674 seq_puts(m, ",rbytes"); 675 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 676 seq_puts(m, ",noasyncreaddir"); 677 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 678 seq_puts(m, ",nodcache"); 679 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 680 seq_puts(m, ",ino32"); 681 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 682 seq_show_option(m, "fsc", fsopt->fscache_uniq); 683 } 684 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 685 seq_puts(m, ",nopoolperm"); 686 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 687 seq_puts(m, ",noquotadf"); 688 689 #ifdef CONFIG_CEPH_FS_POSIX_ACL 690 if (root->d_sb->s_flags & SB_POSIXACL) 691 seq_puts(m, ",acl"); 692 else 693 seq_puts(m, ",noacl"); 694 #endif 695 696 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 697 seq_puts(m, ",copyfrom"); 698 699 /* dump mds_namespace when old device syntax is in use */ 700 if (fsopt->mds_namespace && !fsopt->new_dev_syntax) 701 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 702 703 if (fsopt->mon_addr) 704 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); 705 706 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 707 seq_show_option(m, "recover_session", "clean"); 708 709 if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) 710 seq_puts(m, ",wsync"); 711 712 if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) 713 seq_puts(m, ",nopagecache"); 714 715 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 716 seq_printf(m, ",wsize=%u", fsopt->wsize); 717 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 718 seq_printf(m, ",rsize=%u", fsopt->rsize); 719 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 720 seq_printf(m, ",rasize=%u", fsopt->rasize); 721 if (fsopt->congestion_kb != default_congestion_kb()) 722 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 723 if (fsopt->caps_max) 724 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 725 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 726 seq_printf(m, ",caps_wanted_delay_min=%u", 727 fsopt->caps_wanted_delay_min); 728 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 729 seq_printf(m, ",caps_wanted_delay_max=%u", 730 fsopt->caps_wanted_delay_max); 731 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 732 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 733 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 734 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 735 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 736 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 737 738 return 0; 739 } 740 741 /* 742 * handle any mon messages the standard library doesn't understand. 743 * return error if we don't either. 744 */ 745 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 746 { 747 struct ceph_fs_client *fsc = client->private; 748 int type = le16_to_cpu(msg->hdr.type); 749 750 switch (type) { 751 case CEPH_MSG_MDS_MAP: 752 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 753 return 0; 754 case CEPH_MSG_FS_MAP_USER: 755 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 756 return 0; 757 default: 758 return -1; 759 } 760 } 761 762 /* 763 * create a new fs client 764 * 765 * Success or not, this function consumes @fsopt and @opt. 766 */ 767 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 768 struct ceph_options *opt) 769 { 770 struct ceph_fs_client *fsc; 771 int err; 772 773 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 774 if (!fsc) { 775 err = -ENOMEM; 776 goto fail; 777 } 778 779 fsc->client = ceph_create_client(opt, fsc); 780 if (IS_ERR(fsc->client)) { 781 err = PTR_ERR(fsc->client); 782 goto fail; 783 } 784 opt = NULL; /* fsc->client now owns this */ 785 786 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 787 ceph_set_opt(fsc->client, ABORT_ON_FULL); 788 789 if (!fsopt->mds_namespace) { 790 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 791 0, true); 792 } else { 793 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 794 0, false); 795 } 796 797 fsc->mount_options = fsopt; 798 799 fsc->sb = NULL; 800 fsc->mount_state = CEPH_MOUNT_MOUNTING; 801 fsc->filp_gen = 1; 802 fsc->have_copy_from2 = true; 803 804 atomic_long_set(&fsc->writeback_count, 0); 805 fsc->write_congested = false; 806 807 err = -ENOMEM; 808 /* 809 * The number of concurrent works can be high but they don't need 810 * to be processed in parallel, limit concurrency. 811 */ 812 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 813 if (!fsc->inode_wq) 814 goto fail_client; 815 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 816 if (!fsc->cap_wq) 817 goto fail_inode_wq; 818 819 spin_lock(&ceph_fsc_lock); 820 list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 821 spin_unlock(&ceph_fsc_lock); 822 823 return fsc; 824 825 fail_inode_wq: 826 destroy_workqueue(fsc->inode_wq); 827 fail_client: 828 ceph_destroy_client(fsc->client); 829 fail: 830 kfree(fsc); 831 if (opt) 832 ceph_destroy_options(opt); 833 destroy_mount_options(fsopt); 834 return ERR_PTR(err); 835 } 836 837 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 838 { 839 flush_workqueue(fsc->inode_wq); 840 flush_workqueue(fsc->cap_wq); 841 } 842 843 static void destroy_fs_client(struct ceph_fs_client *fsc) 844 { 845 dout("destroy_fs_client %p\n", fsc); 846 847 spin_lock(&ceph_fsc_lock); 848 list_del(&fsc->metric_wakeup); 849 spin_unlock(&ceph_fsc_lock); 850 851 ceph_mdsc_destroy(fsc); 852 destroy_workqueue(fsc->inode_wq); 853 destroy_workqueue(fsc->cap_wq); 854 855 destroy_mount_options(fsc->mount_options); 856 857 ceph_destroy_client(fsc->client); 858 859 kfree(fsc); 860 dout("destroy_fs_client %p done\n", fsc); 861 } 862 863 /* 864 * caches 865 */ 866 struct kmem_cache *ceph_inode_cachep; 867 struct kmem_cache *ceph_cap_cachep; 868 struct kmem_cache *ceph_cap_snap_cachep; 869 struct kmem_cache *ceph_cap_flush_cachep; 870 struct kmem_cache *ceph_dentry_cachep; 871 struct kmem_cache *ceph_file_cachep; 872 struct kmem_cache *ceph_dir_file_cachep; 873 struct kmem_cache *ceph_mds_request_cachep; 874 mempool_t *ceph_wb_pagevec_pool; 875 876 static void ceph_inode_init_once(void *foo) 877 { 878 struct ceph_inode_info *ci = foo; 879 inode_init_once(&ci->vfs_inode); 880 } 881 882 static int __init init_caches(void) 883 { 884 int error = -ENOMEM; 885 886 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 887 sizeof(struct ceph_inode_info), 888 __alignof__(struct ceph_inode_info), 889 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 890 SLAB_ACCOUNT, ceph_inode_init_once); 891 if (!ceph_inode_cachep) 892 return -ENOMEM; 893 894 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 895 if (!ceph_cap_cachep) 896 goto bad_cap; 897 ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD); 898 if (!ceph_cap_snap_cachep) 899 goto bad_cap_snap; 900 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 901 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 902 if (!ceph_cap_flush_cachep) 903 goto bad_cap_flush; 904 905 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 906 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 907 if (!ceph_dentry_cachep) 908 goto bad_dentry; 909 910 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 911 if (!ceph_file_cachep) 912 goto bad_file; 913 914 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 915 if (!ceph_dir_file_cachep) 916 goto bad_dir_file; 917 918 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 919 if (!ceph_mds_request_cachep) 920 goto bad_mds_req; 921 922 ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); 923 if (!ceph_wb_pagevec_pool) 924 goto bad_pagevec_pool; 925 926 return 0; 927 928 bad_pagevec_pool: 929 kmem_cache_destroy(ceph_mds_request_cachep); 930 bad_mds_req: 931 kmem_cache_destroy(ceph_dir_file_cachep); 932 bad_dir_file: 933 kmem_cache_destroy(ceph_file_cachep); 934 bad_file: 935 kmem_cache_destroy(ceph_dentry_cachep); 936 bad_dentry: 937 kmem_cache_destroy(ceph_cap_flush_cachep); 938 bad_cap_flush: 939 kmem_cache_destroy(ceph_cap_snap_cachep); 940 bad_cap_snap: 941 kmem_cache_destroy(ceph_cap_cachep); 942 bad_cap: 943 kmem_cache_destroy(ceph_inode_cachep); 944 return error; 945 } 946 947 static void destroy_caches(void) 948 { 949 /* 950 * Make sure all delayed rcu free inodes are flushed before we 951 * destroy cache. 952 */ 953 rcu_barrier(); 954 955 kmem_cache_destroy(ceph_inode_cachep); 956 kmem_cache_destroy(ceph_cap_cachep); 957 kmem_cache_destroy(ceph_cap_snap_cachep); 958 kmem_cache_destroy(ceph_cap_flush_cachep); 959 kmem_cache_destroy(ceph_dentry_cachep); 960 kmem_cache_destroy(ceph_file_cachep); 961 kmem_cache_destroy(ceph_dir_file_cachep); 962 kmem_cache_destroy(ceph_mds_request_cachep); 963 mempool_destroy(ceph_wb_pagevec_pool); 964 } 965 966 static void __ceph_umount_begin(struct ceph_fs_client *fsc) 967 { 968 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 969 ceph_mdsc_force_umount(fsc->mdsc); 970 fsc->filp_gen++; // invalidate open files 971 } 972 973 /* 974 * ceph_umount_begin - initiate forced umount. Tear down the 975 * mount, skipping steps that may hang while waiting for server(s). 976 */ 977 void ceph_umount_begin(struct super_block *sb) 978 { 979 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 980 981 dout("ceph_umount_begin - starting forced umount\n"); 982 if (!fsc) 983 return; 984 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 985 __ceph_umount_begin(fsc); 986 } 987 988 static const struct super_operations ceph_super_ops = { 989 .alloc_inode = ceph_alloc_inode, 990 .free_inode = ceph_free_inode, 991 .write_inode = ceph_write_inode, 992 .drop_inode = generic_delete_inode, 993 .evict_inode = ceph_evict_inode, 994 .sync_fs = ceph_sync_fs, 995 .put_super = ceph_put_super, 996 .show_options = ceph_show_options, 997 .statfs = ceph_statfs, 998 .umount_begin = ceph_umount_begin, 999 }; 1000 1001 /* 1002 * Bootstrap mount by opening the root directory. Note the mount 1003 * @started time from caller, and time out if this takes too long. 1004 */ 1005 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 1006 const char *path, 1007 unsigned long started) 1008 { 1009 struct ceph_mds_client *mdsc = fsc->mdsc; 1010 struct ceph_mds_request *req = NULL; 1011 int err; 1012 struct dentry *root; 1013 1014 /* open dir */ 1015 dout("open_root_inode opening '%s'\n", path); 1016 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1017 if (IS_ERR(req)) 1018 return ERR_CAST(req); 1019 req->r_path1 = kstrdup(path, GFP_NOFS); 1020 if (!req->r_path1) { 1021 root = ERR_PTR(-ENOMEM); 1022 goto out; 1023 } 1024 1025 req->r_ino1.ino = CEPH_INO_ROOT; 1026 req->r_ino1.snap = CEPH_NOSNAP; 1027 req->r_started = started; 1028 req->r_timeout = fsc->client->options->mount_timeout; 1029 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 1030 req->r_num_caps = 2; 1031 err = ceph_mdsc_do_request(mdsc, NULL, req); 1032 if (err == 0) { 1033 struct inode *inode = req->r_target_inode; 1034 req->r_target_inode = NULL; 1035 dout("open_root_inode success\n"); 1036 root = d_make_root(inode); 1037 if (!root) { 1038 root = ERR_PTR(-ENOMEM); 1039 goto out; 1040 } 1041 dout("open_root_inode success, root dentry is %p\n", root); 1042 } else { 1043 root = ERR_PTR(err); 1044 } 1045 out: 1046 ceph_mdsc_put_request(req); 1047 return root; 1048 } 1049 1050 /* 1051 * mount: join the ceph cluster, and open root directory. 1052 */ 1053 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 1054 struct fs_context *fc) 1055 { 1056 int err; 1057 unsigned long started = jiffies; /* note the start time */ 1058 struct dentry *root; 1059 1060 dout("mount start %p\n", fsc); 1061 mutex_lock(&fsc->client->mount_mutex); 1062 1063 if (!fsc->sb->s_root) { 1064 const char *path = fsc->mount_options->server_path ? 1065 fsc->mount_options->server_path + 1 : ""; 1066 1067 err = __ceph_open_session(fsc->client, started); 1068 if (err < 0) 1069 goto out; 1070 1071 /* setup fscache */ 1072 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 1073 err = ceph_fscache_register_fs(fsc, fc); 1074 if (err < 0) 1075 goto out; 1076 } 1077 1078 dout("mount opening path '%s'\n", path); 1079 1080 ceph_fs_debugfs_init(fsc); 1081 1082 root = open_root_dentry(fsc, path, started); 1083 if (IS_ERR(root)) { 1084 err = PTR_ERR(root); 1085 goto out; 1086 } 1087 fsc->sb->s_root = dget(root); 1088 } else { 1089 root = dget(fsc->sb->s_root); 1090 } 1091 1092 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1093 dout("mount success\n"); 1094 mutex_unlock(&fsc->client->mount_mutex); 1095 return root; 1096 1097 out: 1098 mutex_unlock(&fsc->client->mount_mutex); 1099 return ERR_PTR(err); 1100 } 1101 1102 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1103 { 1104 struct ceph_fs_client *fsc = s->s_fs_info; 1105 int ret; 1106 1107 dout("set_super %p\n", s); 1108 1109 s->s_maxbytes = MAX_LFS_FILESIZE; 1110 1111 s->s_xattr = ceph_xattr_handlers; 1112 fsc->sb = s; 1113 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1114 1115 s->s_op = &ceph_super_ops; 1116 s->s_d_op = &ceph_dentry_ops; 1117 s->s_export_op = &ceph_export_ops; 1118 1119 s->s_time_gran = 1; 1120 s->s_time_min = 0; 1121 s->s_time_max = U32_MAX; 1122 s->s_flags |= SB_NODIRATIME | SB_NOATIME; 1123 1124 ret = set_anon_super_fc(s, fc); 1125 if (ret != 0) 1126 fsc->sb = NULL; 1127 return ret; 1128 } 1129 1130 /* 1131 * share superblock if same fs AND options 1132 */ 1133 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1134 { 1135 struct ceph_fs_client *new = fc->s_fs_info; 1136 struct ceph_mount_options *fsopt = new->mount_options; 1137 struct ceph_options *opt = new->client->options; 1138 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1139 1140 dout("ceph_compare_super %p\n", sb); 1141 1142 if (compare_mount_options(fsopt, opt, fsc)) { 1143 dout("monitor(s)/mount options don't match\n"); 1144 return 0; 1145 } 1146 if ((opt->flags & CEPH_OPT_FSID) && 1147 ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { 1148 dout("fsid doesn't match\n"); 1149 return 0; 1150 } 1151 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1152 dout("flags differ\n"); 1153 return 0; 1154 } 1155 1156 if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { 1157 dout("client is blocklisted (and CLEANRECOVER is not set)\n"); 1158 return 0; 1159 } 1160 1161 if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 1162 dout("client has been forcibly unmounted\n"); 1163 return 0; 1164 } 1165 1166 return 1; 1167 } 1168 1169 /* 1170 * construct our own bdi so we can control readahead, etc. 1171 */ 1172 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1173 1174 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1175 { 1176 int err; 1177 1178 err = super_setup_bdi_name(sb, "ceph-%ld", 1179 atomic_long_inc_return(&bdi_seq)); 1180 if (err) 1181 return err; 1182 1183 /* set ra_pages based on rasize mount option? */ 1184 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1185 1186 /* set io_pages based on max osd read size */ 1187 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1188 1189 return 0; 1190 } 1191 1192 static int ceph_get_tree(struct fs_context *fc) 1193 { 1194 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1195 struct ceph_mount_options *fsopt = pctx->opts; 1196 struct super_block *sb; 1197 struct ceph_fs_client *fsc; 1198 struct dentry *res; 1199 int (*compare_super)(struct super_block *, struct fs_context *) = 1200 ceph_compare_super; 1201 int err; 1202 1203 dout("ceph_get_tree\n"); 1204 1205 if (!fc->source) 1206 return invalfc(fc, "No source"); 1207 if (fsopt->new_dev_syntax && !fsopt->mon_addr) 1208 return invalfc(fc, "No monitor address"); 1209 1210 /* create client (which we may/may not use) */ 1211 fsc = create_fs_client(pctx->opts, pctx->copts); 1212 pctx->opts = NULL; 1213 pctx->copts = NULL; 1214 if (IS_ERR(fsc)) { 1215 err = PTR_ERR(fsc); 1216 goto out_final; 1217 } 1218 1219 err = ceph_mdsc_init(fsc); 1220 if (err < 0) 1221 goto out; 1222 1223 if (ceph_test_opt(fsc->client, NOSHARE)) 1224 compare_super = NULL; 1225 1226 fc->s_fs_info = fsc; 1227 sb = sget_fc(fc, compare_super, ceph_set_super); 1228 fc->s_fs_info = NULL; 1229 if (IS_ERR(sb)) { 1230 err = PTR_ERR(sb); 1231 goto out; 1232 } 1233 1234 if (ceph_sb_to_client(sb) != fsc) { 1235 destroy_fs_client(fsc); 1236 fsc = ceph_sb_to_client(sb); 1237 dout("get_sb got existing client %p\n", fsc); 1238 } else { 1239 dout("get_sb using new client %p\n", fsc); 1240 err = ceph_setup_bdi(sb, fsc); 1241 if (err < 0) 1242 goto out_splat; 1243 } 1244 1245 res = ceph_real_mount(fsc, fc); 1246 if (IS_ERR(res)) { 1247 err = PTR_ERR(res); 1248 goto out_splat; 1249 } 1250 dout("root %p inode %p ino %llx.%llx\n", res, 1251 d_inode(res), ceph_vinop(d_inode(res))); 1252 fc->root = fsc->sb->s_root; 1253 return 0; 1254 1255 out_splat: 1256 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1257 pr_info("No mds server is up or the cluster is laggy\n"); 1258 err = -EHOSTUNREACH; 1259 } 1260 1261 ceph_mdsc_close_sessions(fsc->mdsc); 1262 deactivate_locked_super(sb); 1263 goto out_final; 1264 1265 out: 1266 destroy_fs_client(fsc); 1267 out_final: 1268 dout("ceph_get_tree fail %d\n", err); 1269 return err; 1270 } 1271 1272 static void ceph_free_fc(struct fs_context *fc) 1273 { 1274 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1275 1276 if (pctx) { 1277 destroy_mount_options(pctx->opts); 1278 ceph_destroy_options(pctx->copts); 1279 kfree(pctx); 1280 } 1281 } 1282 1283 static int ceph_reconfigure_fc(struct fs_context *fc) 1284 { 1285 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1286 struct ceph_mount_options *fsopt = pctx->opts; 1287 struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb); 1288 1289 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1290 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1291 else 1292 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1293 1294 if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { 1295 kfree(fsc->mount_options->mon_addr); 1296 fsc->mount_options->mon_addr = fsopt->mon_addr; 1297 fsopt->mon_addr = NULL; 1298 pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); 1299 } 1300 1301 sync_filesystem(fc->root->d_sb); 1302 return 0; 1303 } 1304 1305 static const struct fs_context_operations ceph_context_ops = { 1306 .free = ceph_free_fc, 1307 .parse_param = ceph_parse_mount_param, 1308 .get_tree = ceph_get_tree, 1309 .reconfigure = ceph_reconfigure_fc, 1310 }; 1311 1312 /* 1313 * Set up the filesystem mount context. 1314 */ 1315 static int ceph_init_fs_context(struct fs_context *fc) 1316 { 1317 struct ceph_parse_opts_ctx *pctx; 1318 struct ceph_mount_options *fsopt; 1319 1320 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1321 if (!pctx) 1322 return -ENOMEM; 1323 1324 pctx->copts = ceph_alloc_options(); 1325 if (!pctx->copts) 1326 goto nomem; 1327 1328 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1329 if (!pctx->opts) 1330 goto nomem; 1331 1332 fsopt = pctx->opts; 1333 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1334 1335 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1336 fsopt->rsize = CEPH_MAX_READ_SIZE; 1337 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1338 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1339 if (!fsopt->snapdir_name) 1340 goto nomem; 1341 1342 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1343 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1344 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1345 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1346 fsopt->congestion_kb = default_congestion_kb(); 1347 1348 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1349 fc->sb_flags |= SB_POSIXACL; 1350 #endif 1351 1352 fc->fs_private = pctx; 1353 fc->ops = &ceph_context_ops; 1354 return 0; 1355 1356 nomem: 1357 destroy_mount_options(pctx->opts); 1358 ceph_destroy_options(pctx->copts); 1359 kfree(pctx); 1360 return -ENOMEM; 1361 } 1362 1363 static void ceph_kill_sb(struct super_block *s) 1364 { 1365 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1366 1367 dout("kill_sb %p\n", s); 1368 1369 ceph_mdsc_pre_umount(fsc->mdsc); 1370 flush_fs_workqueues(fsc); 1371 1372 kill_anon_super(s); 1373 1374 fsc->client->extra_mon_dispatch = NULL; 1375 ceph_fs_debugfs_cleanup(fsc); 1376 1377 ceph_fscache_unregister_fs(fsc); 1378 1379 destroy_fs_client(fsc); 1380 } 1381 1382 static struct file_system_type ceph_fs_type = { 1383 .owner = THIS_MODULE, 1384 .name = "ceph", 1385 .init_fs_context = ceph_init_fs_context, 1386 .kill_sb = ceph_kill_sb, 1387 .fs_flags = FS_RENAME_DOES_D_MOVE, 1388 }; 1389 MODULE_ALIAS_FS("ceph"); 1390 1391 int ceph_force_reconnect(struct super_block *sb) 1392 { 1393 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1394 int err = 0; 1395 1396 fsc->mount_state = CEPH_MOUNT_RECOVER; 1397 __ceph_umount_begin(fsc); 1398 1399 /* Make sure all page caches get invalidated. 1400 * see remove_session_caps_cb() */ 1401 flush_workqueue(fsc->inode_wq); 1402 1403 /* In case that we were blocklisted. This also reset 1404 * all mon/osd connections */ 1405 ceph_reset_client_addr(fsc->client); 1406 1407 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1408 1409 fsc->blocklisted = false; 1410 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1411 1412 if (sb->s_root) { 1413 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1414 CEPH_STAT_CAP_INODE, true); 1415 } 1416 return err; 1417 } 1418 1419 static int __init init_ceph(void) 1420 { 1421 int ret = init_caches(); 1422 if (ret) 1423 goto out; 1424 1425 ceph_flock_init(); 1426 ret = register_filesystem(&ceph_fs_type); 1427 if (ret) 1428 goto out_caches; 1429 1430 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1431 1432 return 0; 1433 1434 out_caches: 1435 destroy_caches(); 1436 out: 1437 return ret; 1438 } 1439 1440 static void __exit exit_ceph(void) 1441 { 1442 dout("exit_ceph\n"); 1443 unregister_filesystem(&ceph_fs_type); 1444 destroy_caches(); 1445 } 1446 1447 static int param_set_metrics(const char *val, const struct kernel_param *kp) 1448 { 1449 struct ceph_fs_client *fsc; 1450 int ret; 1451 1452 ret = param_set_bool(val, kp); 1453 if (ret) { 1454 pr_err("Failed to parse sending metrics switch value '%s'\n", 1455 val); 1456 return ret; 1457 } else if (!disable_send_metrics) { 1458 // wake up all the mds clients 1459 spin_lock(&ceph_fsc_lock); 1460 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1461 metric_schedule_delayed(&fsc->mdsc->metric); 1462 } 1463 spin_unlock(&ceph_fsc_lock); 1464 } 1465 1466 return 0; 1467 } 1468 1469 static const struct kernel_param_ops param_ops_metrics = { 1470 .set = param_set_metrics, 1471 .get = param_get_bool, 1472 }; 1473 1474 bool disable_send_metrics = false; 1475 module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); 1476 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1477 1478 /* for both v1 and v2 syntax */ 1479 static bool mount_support = true; 1480 static const struct kernel_param_ops param_ops_mount_syntax = { 1481 .get = param_get_bool, 1482 }; 1483 module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); 1484 module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); 1485 1486 module_init(init_ceph); 1487 module_exit(exit_ceph); 1488 1489 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1490 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1491 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1492 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1493 MODULE_LICENSE("GPL"); 1494