1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 #include "crypto.h" 24 25 #include <linux/ceph/ceph_features.h> 26 #include <linux/ceph/decode.h> 27 #include <linux/ceph/mon_client.h> 28 #include <linux/ceph/auth.h> 29 #include <linux/ceph/debugfs.h> 30 31 #include <uapi/linux/magic.h> 32 33 static DEFINE_SPINLOCK(ceph_fsc_lock); 34 static LIST_HEAD(ceph_fsc_list); 35 36 /* 37 * Ceph superblock operations 38 * 39 * Handle the basics of mounting, unmounting. 40 */ 41 42 /* 43 * super ops 44 */ 45 static void ceph_put_super(struct super_block *s) 46 { 47 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); 48 49 dout("put_super\n"); 50 ceph_fscrypt_free_dummy_policy(fsc); 51 ceph_mdsc_close_sessions(fsc->mdsc); 52 } 53 54 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 55 { 56 struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry)); 57 struct ceph_mon_client *monc = &fsc->client->monc; 58 struct ceph_statfs st; 59 int i, err; 60 u64 data_pool; 61 62 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 63 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 64 } else { 65 data_pool = CEPH_NOPOOL; 66 } 67 68 dout("statfs\n"); 69 err = ceph_monc_do_statfs(monc, data_pool, &st); 70 if (err < 0) 71 return err; 72 73 /* fill in kstatfs */ 74 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 75 76 /* 77 * Express utilization in terms of large blocks to avoid 78 * overflow on 32-bit machines. 79 */ 80 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 81 82 /* 83 * By default use root quota for stats; fallback to overall filesystem 84 * usage if using 'noquotadf' mount option or if the root dir doesn't 85 * have max_bytes quota set. 86 */ 87 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 88 !ceph_quota_update_statfs(fsc, buf)) { 89 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 92 } 93 94 /* 95 * NOTE: for the time being, we make bsize == frsize to humor 96 * not-yet-ancient versions of glibc that are broken. 97 * Someday, we will probably want to report a real block 98 * size... whatever that may mean for a network file system! 99 */ 100 buf->f_bsize = buf->f_frsize; 101 102 buf->f_files = le64_to_cpu(st.num_objects); 103 buf->f_ffree = -1; 104 buf->f_namelen = NAME_MAX; 105 106 /* Must convert the fsid, for consistent values across arches */ 107 buf->f_fsid.val[0] = 0; 108 mutex_lock(&monc->mutex); 109 for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) 110 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); 111 mutex_unlock(&monc->mutex); 112 113 /* fold the fs_cluster_id into the upper bits */ 114 buf->f_fsid.val[1] = monc->fs_cluster_id; 115 116 return 0; 117 } 118 119 static int ceph_sync_fs(struct super_block *sb, int wait) 120 { 121 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 122 123 if (!wait) { 124 dout("sync_fs (non-blocking)\n"); 125 ceph_flush_dirty_caps(fsc->mdsc); 126 dout("sync_fs (non-blocking) done\n"); 127 return 0; 128 } 129 130 dout("sync_fs (blocking)\n"); 131 ceph_osdc_sync(&fsc->client->osdc); 132 ceph_mdsc_sync(fsc->mdsc); 133 dout("sync_fs (blocking) done\n"); 134 return 0; 135 } 136 137 /* 138 * mount options 139 */ 140 enum { 141 Opt_wsize, 142 Opt_rsize, 143 Opt_rasize, 144 Opt_caps_wanted_delay_min, 145 Opt_caps_wanted_delay_max, 146 Opt_caps_max, 147 Opt_readdir_max_entries, 148 Opt_readdir_max_bytes, 149 Opt_congestion_kb, 150 /* int args above */ 151 Opt_snapdirname, 152 Opt_mds_namespace, 153 Opt_recover_session, 154 Opt_source, 155 Opt_mon_addr, 156 Opt_test_dummy_encryption, 157 /* string args above */ 158 Opt_dirstat, 159 Opt_rbytes, 160 Opt_asyncreaddir, 161 Opt_dcache, 162 Opt_ino32, 163 Opt_fscache, 164 Opt_poolperm, 165 Opt_require_active_mds, 166 Opt_acl, 167 Opt_quotadf, 168 Opt_copyfrom, 169 Opt_wsync, 170 Opt_pagecache, 171 Opt_sparseread, 172 }; 173 174 enum ceph_recover_session_mode { 175 ceph_recover_session_no, 176 ceph_recover_session_clean 177 }; 178 179 static const struct constant_table ceph_param_recover[] = { 180 { "no", ceph_recover_session_no }, 181 { "clean", ceph_recover_session_clean }, 182 {} 183 }; 184 185 static const struct fs_parameter_spec ceph_mount_parameters[] = { 186 fsparam_flag_no ("acl", Opt_acl), 187 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 188 fsparam_s32 ("caps_max", Opt_caps_max), 189 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 190 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 191 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 192 fsparam_flag_no ("copyfrom", Opt_copyfrom), 193 fsparam_flag_no ("dcache", Opt_dcache), 194 fsparam_flag_no ("dirstat", Opt_dirstat), 195 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 196 fsparam_string ("fsc", Opt_fscache), // fsc=... 197 fsparam_flag_no ("ino32", Opt_ino32), 198 fsparam_string ("mds_namespace", Opt_mds_namespace), 199 fsparam_string ("mon_addr", Opt_mon_addr), 200 fsparam_flag_no ("poolperm", Opt_poolperm), 201 fsparam_flag_no ("quotadf", Opt_quotadf), 202 fsparam_u32 ("rasize", Opt_rasize), 203 fsparam_flag_no ("rbytes", Opt_rbytes), 204 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 205 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 206 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 207 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 208 fsparam_u32 ("rsize", Opt_rsize), 209 fsparam_string ("snapdirname", Opt_snapdirname), 210 fsparam_string ("source", Opt_source), 211 fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption), 212 fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption), 213 fsparam_u32 ("wsize", Opt_wsize), 214 fsparam_flag_no ("wsync", Opt_wsync), 215 fsparam_flag_no ("pagecache", Opt_pagecache), 216 fsparam_flag_no ("sparseread", Opt_sparseread), 217 {} 218 }; 219 220 struct ceph_parse_opts_ctx { 221 struct ceph_options *copts; 222 struct ceph_mount_options *opts; 223 }; 224 225 /* 226 * Remove adjacent slashes and then the trailing slash, unless it is 227 * the only remaining character. 228 * 229 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 230 */ 231 static void canonicalize_path(char *path) 232 { 233 int i, j = 0; 234 235 for (i = 0; path[i] != '\0'; i++) { 236 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 237 path[j++] = path[i]; 238 } 239 240 if (j > 1 && path[j - 1] == '/') 241 j--; 242 path[j] = '\0'; 243 } 244 245 /* 246 * Check if the mds namespace in ceph_mount_options matches 247 * the passed in namespace string. First time match (when 248 * ->mds_namespace is NULL) is treated specially, since 249 * ->mds_namespace needs to be initialized by the caller. 250 */ 251 static int namespace_equals(struct ceph_mount_options *fsopt, 252 const char *namespace, size_t len) 253 { 254 return !(fsopt->mds_namespace && 255 (strlen(fsopt->mds_namespace) != len || 256 strncmp(fsopt->mds_namespace, namespace, len))); 257 } 258 259 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, 260 struct fs_context *fc) 261 { 262 int r; 263 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 264 struct ceph_mount_options *fsopt = pctx->opts; 265 266 if (*dev_name_end != ':') 267 return invalfc(fc, "separator ':' missing in source"); 268 269 r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, 270 pctx->copts, fc->log.log, ','); 271 if (r) 272 return r; 273 274 fsopt->new_dev_syntax = false; 275 return 0; 276 } 277 278 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, 279 struct fs_context *fc) 280 { 281 size_t len; 282 struct ceph_fsid fsid; 283 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 284 struct ceph_options *opts = pctx->copts; 285 struct ceph_mount_options *fsopt = pctx->opts; 286 const char *name_start = dev_name; 287 char *fsid_start, *fs_name_start; 288 289 if (*dev_name_end != '=') { 290 dout("separator '=' missing in source"); 291 return -EINVAL; 292 } 293 294 fsid_start = strchr(dev_name, '@'); 295 if (!fsid_start) 296 return invalfc(fc, "missing cluster fsid"); 297 len = fsid_start - name_start; 298 kfree(opts->name); 299 opts->name = kstrndup(name_start, len, GFP_KERNEL); 300 if (!opts->name) 301 return -ENOMEM; 302 dout("using %s entity name", opts->name); 303 304 ++fsid_start; /* start of cluster fsid */ 305 fs_name_start = strchr(fsid_start, '.'); 306 if (!fs_name_start) 307 return invalfc(fc, "missing file system name"); 308 309 if (ceph_parse_fsid(fsid_start, &fsid)) 310 return invalfc(fc, "Invalid FSID"); 311 312 ++fs_name_start; /* start of file system name */ 313 len = dev_name_end - fs_name_start; 314 315 if (!namespace_equals(fsopt, fs_name_start, len)) 316 return invalfc(fc, "Mismatching mds_namespace"); 317 kfree(fsopt->mds_namespace); 318 fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); 319 if (!fsopt->mds_namespace) 320 return -ENOMEM; 321 dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); 322 323 fsopt->new_dev_syntax = true; 324 return 0; 325 } 326 327 /* 328 * Parse the source parameter for new device format. Distinguish the device 329 * spec from the path. Try parsing new device format and fallback to old 330 * format if needed. 331 * 332 * New device syntax will looks like: 333 * <device_spec>=/<path> 334 * where 335 * <device_spec> is name@fsid.fsname 336 * <path> is optional, but if present must begin with '/' 337 * (monitor addresses are passed via mount option) 338 * 339 * Old device syntax is: 340 * <server_spec>[,<server_spec>...]:[<path>] 341 * where 342 * <server_spec> is <ip>[:<port>] 343 * <path> is optional, but if present must begin with '/' 344 */ 345 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 346 { 347 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 348 struct ceph_mount_options *fsopt = pctx->opts; 349 char *dev_name = param->string, *dev_name_end; 350 int ret; 351 352 dout("%s '%s'\n", __func__, dev_name); 353 if (!dev_name || !*dev_name) 354 return invalfc(fc, "Empty source"); 355 356 dev_name_end = strchr(dev_name, '/'); 357 if (dev_name_end) { 358 /* 359 * The server_path will include the whole chars from userland 360 * including the leading '/'. 361 */ 362 kfree(fsopt->server_path); 363 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 364 if (!fsopt->server_path) 365 return -ENOMEM; 366 367 canonicalize_path(fsopt->server_path); 368 } else { 369 dev_name_end = dev_name + strlen(dev_name); 370 } 371 372 dev_name_end--; /* back up to separator */ 373 if (dev_name_end < dev_name) 374 return invalfc(fc, "Path missing in source"); 375 376 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 377 if (fsopt->server_path) 378 dout("server path '%s'\n", fsopt->server_path); 379 380 dout("trying new device syntax"); 381 ret = ceph_parse_new_source(dev_name, dev_name_end, fc); 382 if (ret) { 383 if (ret != -EINVAL) 384 return ret; 385 dout("trying old device syntax"); 386 ret = ceph_parse_old_source(dev_name, dev_name_end, fc); 387 if (ret) 388 return ret; 389 } 390 391 fc->source = param->string; 392 param->string = NULL; 393 return 0; 394 } 395 396 static int ceph_parse_mon_addr(struct fs_parameter *param, 397 struct fs_context *fc) 398 { 399 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 400 struct ceph_mount_options *fsopt = pctx->opts; 401 402 kfree(fsopt->mon_addr); 403 fsopt->mon_addr = param->string; 404 param->string = NULL; 405 406 return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), 407 pctx->copts, fc->log.log, '/'); 408 } 409 410 static int ceph_parse_mount_param(struct fs_context *fc, 411 struct fs_parameter *param) 412 { 413 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 414 struct ceph_mount_options *fsopt = pctx->opts; 415 struct fs_parse_result result; 416 unsigned int mode; 417 int token, ret; 418 419 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 420 if (ret != -ENOPARAM) 421 return ret; 422 423 token = fs_parse(fc, ceph_mount_parameters, param, &result); 424 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 425 if (token < 0) 426 return token; 427 428 switch (token) { 429 case Opt_snapdirname: 430 kfree(fsopt->snapdir_name); 431 fsopt->snapdir_name = param->string; 432 param->string = NULL; 433 break; 434 case Opt_mds_namespace: 435 if (!namespace_equals(fsopt, param->string, strlen(param->string))) 436 return invalfc(fc, "Mismatching mds_namespace"); 437 kfree(fsopt->mds_namespace); 438 fsopt->mds_namespace = param->string; 439 param->string = NULL; 440 break; 441 case Opt_recover_session: 442 mode = result.uint_32; 443 if (mode == ceph_recover_session_no) 444 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 445 else if (mode == ceph_recover_session_clean) 446 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 447 else 448 BUG(); 449 break; 450 case Opt_source: 451 if (fc->source) 452 return invalfc(fc, "Multiple sources specified"); 453 return ceph_parse_source(param, fc); 454 case Opt_mon_addr: 455 return ceph_parse_mon_addr(param, fc); 456 case Opt_wsize: 457 if (result.uint_32 < PAGE_SIZE || 458 result.uint_32 > CEPH_MAX_WRITE_SIZE) 459 goto out_of_range; 460 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 461 break; 462 case Opt_rsize: 463 if (result.uint_32 < PAGE_SIZE || 464 result.uint_32 > CEPH_MAX_READ_SIZE) 465 goto out_of_range; 466 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 467 break; 468 case Opt_rasize: 469 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 470 break; 471 case Opt_caps_wanted_delay_min: 472 if (result.uint_32 < 1) 473 goto out_of_range; 474 fsopt->caps_wanted_delay_min = result.uint_32; 475 break; 476 case Opt_caps_wanted_delay_max: 477 if (result.uint_32 < 1) 478 goto out_of_range; 479 fsopt->caps_wanted_delay_max = result.uint_32; 480 break; 481 case Opt_caps_max: 482 if (result.int_32 < 0) 483 goto out_of_range; 484 fsopt->caps_max = result.int_32; 485 break; 486 case Opt_readdir_max_entries: 487 if (result.uint_32 < 1) 488 goto out_of_range; 489 fsopt->max_readdir = result.uint_32; 490 break; 491 case Opt_readdir_max_bytes: 492 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 493 goto out_of_range; 494 fsopt->max_readdir_bytes = result.uint_32; 495 break; 496 case Opt_congestion_kb: 497 if (result.uint_32 < 1024) /* at least 1M */ 498 goto out_of_range; 499 fsopt->congestion_kb = result.uint_32; 500 break; 501 case Opt_dirstat: 502 if (!result.negated) 503 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 504 else 505 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 506 break; 507 case Opt_rbytes: 508 if (!result.negated) 509 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 510 else 511 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 512 break; 513 case Opt_asyncreaddir: 514 if (!result.negated) 515 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 516 else 517 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 518 break; 519 case Opt_dcache: 520 if (!result.negated) 521 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 522 else 523 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 524 break; 525 case Opt_ino32: 526 if (!result.negated) 527 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 528 else 529 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 530 break; 531 532 case Opt_fscache: 533 #ifdef CONFIG_CEPH_FSCACHE 534 kfree(fsopt->fscache_uniq); 535 fsopt->fscache_uniq = NULL; 536 if (result.negated) { 537 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 538 } else { 539 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 540 fsopt->fscache_uniq = param->string; 541 param->string = NULL; 542 } 543 break; 544 #else 545 return invalfc(fc, "fscache support is disabled"); 546 #endif 547 case Opt_poolperm: 548 if (!result.negated) 549 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 550 else 551 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 552 break; 553 case Opt_require_active_mds: 554 if (!result.negated) 555 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 556 else 557 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 558 break; 559 case Opt_quotadf: 560 if (!result.negated) 561 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 562 else 563 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 564 break; 565 case Opt_copyfrom: 566 if (!result.negated) 567 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 568 else 569 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 570 break; 571 case Opt_acl: 572 if (!result.negated) { 573 #ifdef CONFIG_CEPH_FS_POSIX_ACL 574 fc->sb_flags |= SB_POSIXACL; 575 #else 576 return invalfc(fc, "POSIX ACL support is disabled"); 577 #endif 578 } else { 579 fc->sb_flags &= ~SB_POSIXACL; 580 } 581 break; 582 case Opt_wsync: 583 if (!result.negated) 584 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 585 else 586 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 587 break; 588 case Opt_pagecache: 589 if (result.negated) 590 fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; 591 else 592 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; 593 break; 594 case Opt_sparseread: 595 if (result.negated) 596 fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD; 597 else 598 fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD; 599 break; 600 case Opt_test_dummy_encryption: 601 #ifdef CONFIG_FS_ENCRYPTION 602 fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy); 603 ret = fscrypt_parse_test_dummy_encryption(param, 604 &fsopt->dummy_enc_policy); 605 if (ret == -EINVAL) { 606 warnfc(fc, "Value of option \"%s\" is unrecognized", 607 param->key); 608 } else if (ret == -EEXIST) { 609 warnfc(fc, "Conflicting test_dummy_encryption options"); 610 ret = -EINVAL; 611 } 612 #else 613 warnfc(fc, 614 "FS encryption not supported: test_dummy_encryption mount option ignored"); 615 #endif 616 break; 617 default: 618 BUG(); 619 } 620 return 0; 621 622 out_of_range: 623 return invalfc(fc, "%s out of range", param->key); 624 } 625 626 static void destroy_mount_options(struct ceph_mount_options *args) 627 { 628 dout("destroy_mount_options %p\n", args); 629 if (!args) 630 return; 631 632 kfree(args->snapdir_name); 633 kfree(args->mds_namespace); 634 kfree(args->server_path); 635 kfree(args->fscache_uniq); 636 kfree(args->mon_addr); 637 fscrypt_free_dummy_policy(&args->dummy_enc_policy); 638 kfree(args); 639 } 640 641 static int strcmp_null(const char *s1, const char *s2) 642 { 643 if (!s1 && !s2) 644 return 0; 645 if (s1 && !s2) 646 return -1; 647 if (!s1 && s2) 648 return 1; 649 return strcmp(s1, s2); 650 } 651 652 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 653 struct ceph_options *new_opt, 654 struct ceph_fs_client *fsc) 655 { 656 struct ceph_mount_options *fsopt1 = new_fsopt; 657 struct ceph_mount_options *fsopt2 = fsc->mount_options; 658 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 659 int ret; 660 661 ret = memcmp(fsopt1, fsopt2, ofs); 662 if (ret) 663 return ret; 664 665 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 666 if (ret) 667 return ret; 668 669 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 670 if (ret) 671 return ret; 672 673 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 674 if (ret) 675 return ret; 676 677 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 678 if (ret) 679 return ret; 680 681 ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); 682 if (ret) 683 return ret; 684 685 return ceph_compare_options(new_opt, fsc->client); 686 } 687 688 /** 689 * ceph_show_options - Show mount options in /proc/mounts 690 * @m: seq_file to write to 691 * @root: root of that (sub)tree 692 */ 693 static int ceph_show_options(struct seq_file *m, struct dentry *root) 694 { 695 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb); 696 struct ceph_mount_options *fsopt = fsc->mount_options; 697 size_t pos; 698 int ret; 699 700 /* a comma between MNT/MS and client options */ 701 seq_putc(m, ','); 702 pos = m->count; 703 704 ret = ceph_print_client_options(m, fsc->client, false); 705 if (ret) 706 return ret; 707 708 /* retract our comma if no client options */ 709 if (m->count == pos) 710 m->count--; 711 712 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 713 seq_puts(m, ",dirstat"); 714 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 715 seq_puts(m, ",rbytes"); 716 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 717 seq_puts(m, ",noasyncreaddir"); 718 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 719 seq_puts(m, ",nodcache"); 720 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 721 seq_puts(m, ",ino32"); 722 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 723 seq_show_option(m, "fsc", fsopt->fscache_uniq); 724 } 725 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 726 seq_puts(m, ",nopoolperm"); 727 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 728 seq_puts(m, ",noquotadf"); 729 730 #ifdef CONFIG_CEPH_FS_POSIX_ACL 731 if (root->d_sb->s_flags & SB_POSIXACL) 732 seq_puts(m, ",acl"); 733 else 734 seq_puts(m, ",noacl"); 735 #endif 736 737 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 738 seq_puts(m, ",copyfrom"); 739 740 /* dump mds_namespace when old device syntax is in use */ 741 if (fsopt->mds_namespace && !fsopt->new_dev_syntax) 742 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 743 744 if (fsopt->mon_addr) 745 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); 746 747 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 748 seq_show_option(m, "recover_session", "clean"); 749 750 if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) 751 seq_puts(m, ",wsync"); 752 if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) 753 seq_puts(m, ",nopagecache"); 754 if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD) 755 seq_puts(m, ",sparseread"); 756 757 fscrypt_show_test_dummy_encryption(m, ',', root->d_sb); 758 759 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 760 seq_printf(m, ",wsize=%u", fsopt->wsize); 761 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 762 seq_printf(m, ",rsize=%u", fsopt->rsize); 763 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 764 seq_printf(m, ",rasize=%u", fsopt->rasize); 765 if (fsopt->congestion_kb != default_congestion_kb()) 766 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 767 if (fsopt->caps_max) 768 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 769 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 770 seq_printf(m, ",caps_wanted_delay_min=%u", 771 fsopt->caps_wanted_delay_min); 772 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 773 seq_printf(m, ",caps_wanted_delay_max=%u", 774 fsopt->caps_wanted_delay_max); 775 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 776 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 777 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 778 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 779 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 780 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 781 782 return 0; 783 } 784 785 /* 786 * handle any mon messages the standard library doesn't understand. 787 * return error if we don't either. 788 */ 789 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 790 { 791 struct ceph_fs_client *fsc = client->private; 792 int type = le16_to_cpu(msg->hdr.type); 793 794 switch (type) { 795 case CEPH_MSG_MDS_MAP: 796 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 797 return 0; 798 case CEPH_MSG_FS_MAP_USER: 799 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 800 return 0; 801 default: 802 return -1; 803 } 804 } 805 806 /* 807 * create a new fs client 808 * 809 * Success or not, this function consumes @fsopt and @opt. 810 */ 811 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 812 struct ceph_options *opt) 813 { 814 struct ceph_fs_client *fsc; 815 int err; 816 817 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 818 if (!fsc) { 819 err = -ENOMEM; 820 goto fail; 821 } 822 823 fsc->client = ceph_create_client(opt, fsc); 824 if (IS_ERR(fsc->client)) { 825 err = PTR_ERR(fsc->client); 826 goto fail; 827 } 828 opt = NULL; /* fsc->client now owns this */ 829 830 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 831 ceph_set_opt(fsc->client, ABORT_ON_FULL); 832 833 if (!fsopt->mds_namespace) { 834 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 835 0, true); 836 } else { 837 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 838 0, false); 839 } 840 841 fsc->mount_options = fsopt; 842 843 fsc->sb = NULL; 844 fsc->mount_state = CEPH_MOUNT_MOUNTING; 845 fsc->filp_gen = 1; 846 fsc->have_copy_from2 = true; 847 848 atomic_long_set(&fsc->writeback_count, 0); 849 fsc->write_congested = false; 850 851 err = -ENOMEM; 852 /* 853 * The number of concurrent works can be high but they don't need 854 * to be processed in parallel, limit concurrency. 855 */ 856 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 857 if (!fsc->inode_wq) 858 goto fail_client; 859 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 860 if (!fsc->cap_wq) 861 goto fail_inode_wq; 862 863 hash_init(fsc->async_unlink_conflict); 864 spin_lock_init(&fsc->async_unlink_conflict_lock); 865 866 spin_lock(&ceph_fsc_lock); 867 list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 868 spin_unlock(&ceph_fsc_lock); 869 870 return fsc; 871 872 fail_inode_wq: 873 destroy_workqueue(fsc->inode_wq); 874 fail_client: 875 ceph_destroy_client(fsc->client); 876 fail: 877 kfree(fsc); 878 if (opt) 879 ceph_destroy_options(opt); 880 destroy_mount_options(fsopt); 881 return ERR_PTR(err); 882 } 883 884 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 885 { 886 flush_workqueue(fsc->inode_wq); 887 flush_workqueue(fsc->cap_wq); 888 } 889 890 static void destroy_fs_client(struct ceph_fs_client *fsc) 891 { 892 dout("destroy_fs_client %p\n", fsc); 893 894 spin_lock(&ceph_fsc_lock); 895 list_del(&fsc->metric_wakeup); 896 spin_unlock(&ceph_fsc_lock); 897 898 ceph_mdsc_destroy(fsc); 899 destroy_workqueue(fsc->inode_wq); 900 destroy_workqueue(fsc->cap_wq); 901 902 destroy_mount_options(fsc->mount_options); 903 904 ceph_destroy_client(fsc->client); 905 906 kfree(fsc); 907 dout("destroy_fs_client %p done\n", fsc); 908 } 909 910 /* 911 * caches 912 */ 913 struct kmem_cache *ceph_inode_cachep; 914 struct kmem_cache *ceph_cap_cachep; 915 struct kmem_cache *ceph_cap_snap_cachep; 916 struct kmem_cache *ceph_cap_flush_cachep; 917 struct kmem_cache *ceph_dentry_cachep; 918 struct kmem_cache *ceph_file_cachep; 919 struct kmem_cache *ceph_dir_file_cachep; 920 struct kmem_cache *ceph_mds_request_cachep; 921 mempool_t *ceph_wb_pagevec_pool; 922 923 static void ceph_inode_init_once(void *foo) 924 { 925 struct ceph_inode_info *ci = foo; 926 inode_init_once(&ci->netfs.inode); 927 } 928 929 static int __init init_caches(void) 930 { 931 int error = -ENOMEM; 932 933 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 934 sizeof(struct ceph_inode_info), 935 __alignof__(struct ceph_inode_info), 936 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 937 SLAB_ACCOUNT, ceph_inode_init_once); 938 if (!ceph_inode_cachep) 939 return -ENOMEM; 940 941 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 942 if (!ceph_cap_cachep) 943 goto bad_cap; 944 ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD); 945 if (!ceph_cap_snap_cachep) 946 goto bad_cap_snap; 947 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 948 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 949 if (!ceph_cap_flush_cachep) 950 goto bad_cap_flush; 951 952 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 953 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 954 if (!ceph_dentry_cachep) 955 goto bad_dentry; 956 957 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 958 if (!ceph_file_cachep) 959 goto bad_file; 960 961 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 962 if (!ceph_dir_file_cachep) 963 goto bad_dir_file; 964 965 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 966 if (!ceph_mds_request_cachep) 967 goto bad_mds_req; 968 969 ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, 970 (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *)); 971 if (!ceph_wb_pagevec_pool) 972 goto bad_pagevec_pool; 973 974 return 0; 975 976 bad_pagevec_pool: 977 kmem_cache_destroy(ceph_mds_request_cachep); 978 bad_mds_req: 979 kmem_cache_destroy(ceph_dir_file_cachep); 980 bad_dir_file: 981 kmem_cache_destroy(ceph_file_cachep); 982 bad_file: 983 kmem_cache_destroy(ceph_dentry_cachep); 984 bad_dentry: 985 kmem_cache_destroy(ceph_cap_flush_cachep); 986 bad_cap_flush: 987 kmem_cache_destroy(ceph_cap_snap_cachep); 988 bad_cap_snap: 989 kmem_cache_destroy(ceph_cap_cachep); 990 bad_cap: 991 kmem_cache_destroy(ceph_inode_cachep); 992 return error; 993 } 994 995 static void destroy_caches(void) 996 { 997 /* 998 * Make sure all delayed rcu free inodes are flushed before we 999 * destroy cache. 1000 */ 1001 rcu_barrier(); 1002 1003 kmem_cache_destroy(ceph_inode_cachep); 1004 kmem_cache_destroy(ceph_cap_cachep); 1005 kmem_cache_destroy(ceph_cap_snap_cachep); 1006 kmem_cache_destroy(ceph_cap_flush_cachep); 1007 kmem_cache_destroy(ceph_dentry_cachep); 1008 kmem_cache_destroy(ceph_file_cachep); 1009 kmem_cache_destroy(ceph_dir_file_cachep); 1010 kmem_cache_destroy(ceph_mds_request_cachep); 1011 mempool_destroy(ceph_wb_pagevec_pool); 1012 } 1013 1014 static void __ceph_umount_begin(struct ceph_fs_client *fsc) 1015 { 1016 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 1017 ceph_mdsc_force_umount(fsc->mdsc); 1018 fsc->filp_gen++; // invalidate open files 1019 } 1020 1021 /* 1022 * ceph_umount_begin - initiate forced umount. Tear down the 1023 * mount, skipping steps that may hang while waiting for server(s). 1024 */ 1025 void ceph_umount_begin(struct super_block *sb) 1026 { 1027 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1028 1029 dout("ceph_umount_begin - starting forced umount\n"); 1030 if (!fsc) 1031 return; 1032 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 1033 __ceph_umount_begin(fsc); 1034 } 1035 1036 static const struct super_operations ceph_super_ops = { 1037 .alloc_inode = ceph_alloc_inode, 1038 .free_inode = ceph_free_inode, 1039 .write_inode = ceph_write_inode, 1040 .drop_inode = generic_delete_inode, 1041 .evict_inode = ceph_evict_inode, 1042 .sync_fs = ceph_sync_fs, 1043 .put_super = ceph_put_super, 1044 .show_options = ceph_show_options, 1045 .statfs = ceph_statfs, 1046 .umount_begin = ceph_umount_begin, 1047 }; 1048 1049 /* 1050 * Bootstrap mount by opening the root directory. Note the mount 1051 * @started time from caller, and time out if this takes too long. 1052 */ 1053 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 1054 const char *path, 1055 unsigned long started) 1056 { 1057 struct ceph_mds_client *mdsc = fsc->mdsc; 1058 struct ceph_mds_request *req = NULL; 1059 int err; 1060 struct dentry *root; 1061 1062 /* open dir */ 1063 dout("open_root_inode opening '%s'\n", path); 1064 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1065 if (IS_ERR(req)) 1066 return ERR_CAST(req); 1067 req->r_path1 = kstrdup(path, GFP_NOFS); 1068 if (!req->r_path1) { 1069 root = ERR_PTR(-ENOMEM); 1070 goto out; 1071 } 1072 1073 req->r_ino1.ino = CEPH_INO_ROOT; 1074 req->r_ino1.snap = CEPH_NOSNAP; 1075 req->r_started = started; 1076 req->r_timeout = fsc->client->options->mount_timeout; 1077 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 1078 req->r_num_caps = 2; 1079 err = ceph_mdsc_do_request(mdsc, NULL, req); 1080 if (err == 0) { 1081 struct inode *inode = req->r_target_inode; 1082 req->r_target_inode = NULL; 1083 dout("open_root_inode success\n"); 1084 root = d_make_root(inode); 1085 if (!root) { 1086 root = ERR_PTR(-ENOMEM); 1087 goto out; 1088 } 1089 dout("open_root_inode success, root dentry is %p\n", root); 1090 } else { 1091 root = ERR_PTR(err); 1092 } 1093 out: 1094 ceph_mdsc_put_request(req); 1095 return root; 1096 } 1097 1098 #ifdef CONFIG_FS_ENCRYPTION 1099 static int ceph_apply_test_dummy_encryption(struct super_block *sb, 1100 struct fs_context *fc, 1101 struct ceph_mount_options *fsopt) 1102 { 1103 struct ceph_fs_client *fsc = sb->s_fs_info; 1104 1105 if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy)) 1106 return 0; 1107 1108 /* No changing encryption context on remount. */ 1109 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && 1110 !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { 1111 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy, 1112 &fsc->fsc_dummy_enc_policy)) 1113 return 0; 1114 errorfc(fc, "Can't set test_dummy_encryption on remount"); 1115 return -EINVAL; 1116 } 1117 1118 /* Also make sure fsopt doesn't contain a conflicting value. */ 1119 if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { 1120 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy, 1121 &fsc->fsc_dummy_enc_policy)) 1122 return 0; 1123 errorfc(fc, "Conflicting test_dummy_encryption options"); 1124 return -EINVAL; 1125 } 1126 1127 fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy; 1128 memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy)); 1129 1130 warnfc(fc, "test_dummy_encryption mode enabled"); 1131 return 0; 1132 } 1133 #else 1134 static int ceph_apply_test_dummy_encryption(struct super_block *sb, 1135 struct fs_context *fc, 1136 struct ceph_mount_options *fsopt) 1137 { 1138 return 0; 1139 } 1140 #endif 1141 1142 /* 1143 * mount: join the ceph cluster, and open root directory. 1144 */ 1145 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 1146 struct fs_context *fc) 1147 { 1148 int err; 1149 unsigned long started = jiffies; /* note the start time */ 1150 struct dentry *root; 1151 1152 dout("mount start %p\n", fsc); 1153 mutex_lock(&fsc->client->mount_mutex); 1154 1155 if (!fsc->sb->s_root) { 1156 const char *path = fsc->mount_options->server_path ? 1157 fsc->mount_options->server_path + 1 : ""; 1158 1159 err = __ceph_open_session(fsc->client, started); 1160 if (err < 0) 1161 goto out; 1162 1163 /* setup fscache */ 1164 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 1165 err = ceph_fscache_register_fs(fsc, fc); 1166 if (err < 0) 1167 goto out; 1168 } 1169 1170 err = ceph_apply_test_dummy_encryption(fsc->sb, fc, 1171 fsc->mount_options); 1172 if (err) 1173 goto out; 1174 1175 dout("mount opening path '%s'\n", path); 1176 1177 ceph_fs_debugfs_init(fsc); 1178 1179 root = open_root_dentry(fsc, path, started); 1180 if (IS_ERR(root)) { 1181 err = PTR_ERR(root); 1182 goto out; 1183 } 1184 fsc->sb->s_root = dget(root); 1185 } else { 1186 root = dget(fsc->sb->s_root); 1187 } 1188 1189 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1190 dout("mount success\n"); 1191 mutex_unlock(&fsc->client->mount_mutex); 1192 return root; 1193 1194 out: 1195 mutex_unlock(&fsc->client->mount_mutex); 1196 ceph_fscrypt_free_dummy_policy(fsc); 1197 return ERR_PTR(err); 1198 } 1199 1200 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1201 { 1202 struct ceph_fs_client *fsc = s->s_fs_info; 1203 int ret; 1204 1205 dout("set_super %p\n", s); 1206 1207 s->s_maxbytes = MAX_LFS_FILESIZE; 1208 1209 s->s_xattr = ceph_xattr_handlers; 1210 fsc->sb = s; 1211 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1212 1213 s->s_op = &ceph_super_ops; 1214 s->s_d_op = &ceph_dentry_ops; 1215 s->s_export_op = &ceph_export_ops; 1216 1217 s->s_time_gran = 1; 1218 s->s_time_min = 0; 1219 s->s_time_max = U32_MAX; 1220 s->s_flags |= SB_NODIRATIME | SB_NOATIME; 1221 1222 ceph_fscrypt_set_ops(s); 1223 1224 ret = set_anon_super_fc(s, fc); 1225 if (ret != 0) 1226 fsc->sb = NULL; 1227 return ret; 1228 } 1229 1230 /* 1231 * share superblock if same fs AND options 1232 */ 1233 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1234 { 1235 struct ceph_fs_client *new = fc->s_fs_info; 1236 struct ceph_mount_options *fsopt = new->mount_options; 1237 struct ceph_options *opt = new->client->options; 1238 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1239 1240 dout("ceph_compare_super %p\n", sb); 1241 1242 if (compare_mount_options(fsopt, opt, fsc)) { 1243 dout("monitor(s)/mount options don't match\n"); 1244 return 0; 1245 } 1246 if ((opt->flags & CEPH_OPT_FSID) && 1247 ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { 1248 dout("fsid doesn't match\n"); 1249 return 0; 1250 } 1251 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1252 dout("flags differ\n"); 1253 return 0; 1254 } 1255 1256 if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { 1257 dout("client is blocklisted (and CLEANRECOVER is not set)\n"); 1258 return 0; 1259 } 1260 1261 if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 1262 dout("client has been forcibly unmounted\n"); 1263 return 0; 1264 } 1265 1266 return 1; 1267 } 1268 1269 /* 1270 * construct our own bdi so we can control readahead, etc. 1271 */ 1272 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1273 1274 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1275 { 1276 int err; 1277 1278 err = super_setup_bdi_name(sb, "ceph-%ld", 1279 atomic_long_inc_return(&bdi_seq)); 1280 if (err) 1281 return err; 1282 1283 /* set ra_pages based on rasize mount option? */ 1284 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1285 1286 /* set io_pages based on max osd read size */ 1287 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1288 1289 return 0; 1290 } 1291 1292 static int ceph_get_tree(struct fs_context *fc) 1293 { 1294 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1295 struct ceph_mount_options *fsopt = pctx->opts; 1296 struct super_block *sb; 1297 struct ceph_fs_client *fsc; 1298 struct dentry *res; 1299 int (*compare_super)(struct super_block *, struct fs_context *) = 1300 ceph_compare_super; 1301 int err; 1302 1303 dout("ceph_get_tree\n"); 1304 1305 if (!fc->source) 1306 return invalfc(fc, "No source"); 1307 if (fsopt->new_dev_syntax && !fsopt->mon_addr) 1308 return invalfc(fc, "No monitor address"); 1309 1310 /* create client (which we may/may not use) */ 1311 fsc = create_fs_client(pctx->opts, pctx->copts); 1312 pctx->opts = NULL; 1313 pctx->copts = NULL; 1314 if (IS_ERR(fsc)) { 1315 err = PTR_ERR(fsc); 1316 goto out_final; 1317 } 1318 1319 err = ceph_mdsc_init(fsc); 1320 if (err < 0) 1321 goto out; 1322 1323 if (ceph_test_opt(fsc->client, NOSHARE)) 1324 compare_super = NULL; 1325 1326 fc->s_fs_info = fsc; 1327 sb = sget_fc(fc, compare_super, ceph_set_super); 1328 fc->s_fs_info = NULL; 1329 if (IS_ERR(sb)) { 1330 err = PTR_ERR(sb); 1331 goto out; 1332 } 1333 1334 if (ceph_sb_to_fs_client(sb) != fsc) { 1335 destroy_fs_client(fsc); 1336 fsc = ceph_sb_to_fs_client(sb); 1337 dout("get_sb got existing client %p\n", fsc); 1338 } else { 1339 dout("get_sb using new client %p\n", fsc); 1340 err = ceph_setup_bdi(sb, fsc); 1341 if (err < 0) 1342 goto out_splat; 1343 } 1344 1345 res = ceph_real_mount(fsc, fc); 1346 if (IS_ERR(res)) { 1347 err = PTR_ERR(res); 1348 goto out_splat; 1349 } 1350 dout("root %p inode %p ino %llx.%llx\n", res, 1351 d_inode(res), ceph_vinop(d_inode(res))); 1352 fc->root = fsc->sb->s_root; 1353 return 0; 1354 1355 out_splat: 1356 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1357 pr_info("No mds server is up or the cluster is laggy\n"); 1358 err = -EHOSTUNREACH; 1359 } 1360 1361 ceph_mdsc_close_sessions(fsc->mdsc); 1362 deactivate_locked_super(sb); 1363 goto out_final; 1364 1365 out: 1366 destroy_fs_client(fsc); 1367 out_final: 1368 dout("ceph_get_tree fail %d\n", err); 1369 return err; 1370 } 1371 1372 static void ceph_free_fc(struct fs_context *fc) 1373 { 1374 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1375 1376 if (pctx) { 1377 destroy_mount_options(pctx->opts); 1378 ceph_destroy_options(pctx->copts); 1379 kfree(pctx); 1380 } 1381 } 1382 1383 static int ceph_reconfigure_fc(struct fs_context *fc) 1384 { 1385 int err; 1386 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1387 struct ceph_mount_options *fsopt = pctx->opts; 1388 struct super_block *sb = fc->root->d_sb; 1389 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1390 1391 err = ceph_apply_test_dummy_encryption(sb, fc, fsopt); 1392 if (err) 1393 return err; 1394 1395 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1396 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1397 else 1398 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1399 1400 if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD) 1401 ceph_set_mount_opt(fsc, SPARSEREAD); 1402 else 1403 ceph_clear_mount_opt(fsc, SPARSEREAD); 1404 1405 if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { 1406 kfree(fsc->mount_options->mon_addr); 1407 fsc->mount_options->mon_addr = fsopt->mon_addr; 1408 fsopt->mon_addr = NULL; 1409 pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); 1410 } 1411 1412 sync_filesystem(sb); 1413 return 0; 1414 } 1415 1416 static const struct fs_context_operations ceph_context_ops = { 1417 .free = ceph_free_fc, 1418 .parse_param = ceph_parse_mount_param, 1419 .get_tree = ceph_get_tree, 1420 .reconfigure = ceph_reconfigure_fc, 1421 }; 1422 1423 /* 1424 * Set up the filesystem mount context. 1425 */ 1426 static int ceph_init_fs_context(struct fs_context *fc) 1427 { 1428 struct ceph_parse_opts_ctx *pctx; 1429 struct ceph_mount_options *fsopt; 1430 1431 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1432 if (!pctx) 1433 return -ENOMEM; 1434 1435 pctx->copts = ceph_alloc_options(); 1436 if (!pctx->copts) 1437 goto nomem; 1438 1439 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1440 if (!pctx->opts) 1441 goto nomem; 1442 1443 fsopt = pctx->opts; 1444 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1445 1446 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1447 fsopt->rsize = CEPH_MAX_READ_SIZE; 1448 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1449 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1450 if (!fsopt->snapdir_name) 1451 goto nomem; 1452 1453 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1454 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1455 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1456 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1457 fsopt->congestion_kb = default_congestion_kb(); 1458 1459 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1460 fc->sb_flags |= SB_POSIXACL; 1461 #endif 1462 1463 fc->fs_private = pctx; 1464 fc->ops = &ceph_context_ops; 1465 return 0; 1466 1467 nomem: 1468 destroy_mount_options(pctx->opts); 1469 ceph_destroy_options(pctx->copts); 1470 kfree(pctx); 1471 return -ENOMEM; 1472 } 1473 1474 /* 1475 * Return true if it successfully increases the blocker counter, 1476 * or false if the mdsc is in stopping and flushed state. 1477 */ 1478 static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc) 1479 { 1480 spin_lock(&mdsc->stopping_lock); 1481 if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) { 1482 spin_unlock(&mdsc->stopping_lock); 1483 return false; 1484 } 1485 atomic_inc(&mdsc->stopping_blockers); 1486 spin_unlock(&mdsc->stopping_lock); 1487 return true; 1488 } 1489 1490 static void __dec_stopping_blocker(struct ceph_mds_client *mdsc) 1491 { 1492 spin_lock(&mdsc->stopping_lock); 1493 if (!atomic_dec_return(&mdsc->stopping_blockers) && 1494 mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) 1495 complete_all(&mdsc->stopping_waiter); 1496 spin_unlock(&mdsc->stopping_lock); 1497 } 1498 1499 /* For metadata IO requests */ 1500 bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc, 1501 struct ceph_mds_session *session) 1502 { 1503 mutex_lock(&session->s_mutex); 1504 inc_session_sequence(session); 1505 mutex_unlock(&session->s_mutex); 1506 1507 return __inc_stopping_blocker(mdsc); 1508 } 1509 1510 void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc) 1511 { 1512 __dec_stopping_blocker(mdsc); 1513 } 1514 1515 /* For data IO requests */ 1516 bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc) 1517 { 1518 return __inc_stopping_blocker(mdsc); 1519 } 1520 1521 void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc) 1522 { 1523 __dec_stopping_blocker(mdsc); 1524 } 1525 1526 static void ceph_kill_sb(struct super_block *s) 1527 { 1528 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); 1529 struct ceph_mds_client *mdsc = fsc->mdsc; 1530 bool wait; 1531 1532 dout("kill_sb %p\n", s); 1533 1534 ceph_mdsc_pre_umount(mdsc); 1535 flush_fs_workqueues(fsc); 1536 1537 /* 1538 * Though the kill_anon_super() will finally trigger the 1539 * sync_filesystem() anyway, we still need to do it here and 1540 * then bump the stage of shutdown. This will allow us to 1541 * drop any further message, which will increase the inodes' 1542 * i_count reference counters but makes no sense any more, 1543 * from MDSs. 1544 * 1545 * Without this when evicting the inodes it may fail in the 1546 * kill_anon_super(), which will trigger a warning when 1547 * destroying the fscrypt keyring and then possibly trigger 1548 * a further crash in ceph module when the iput() tries to 1549 * evict the inodes later. 1550 */ 1551 sync_filesystem(s); 1552 1553 spin_lock(&mdsc->stopping_lock); 1554 mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING; 1555 wait = !!atomic_read(&mdsc->stopping_blockers); 1556 spin_unlock(&mdsc->stopping_lock); 1557 1558 if (wait && atomic_read(&mdsc->stopping_blockers)) { 1559 long timeleft = wait_for_completion_killable_timeout( 1560 &mdsc->stopping_waiter, 1561 fsc->client->options->mount_timeout); 1562 if (!timeleft) /* timed out */ 1563 pr_warn("umount timed out, %ld\n", timeleft); 1564 else if (timeleft < 0) /* killed */ 1565 pr_warn("umount was killed, %ld\n", timeleft); 1566 } 1567 1568 mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; 1569 kill_anon_super(s); 1570 1571 fsc->client->extra_mon_dispatch = NULL; 1572 ceph_fs_debugfs_cleanup(fsc); 1573 1574 ceph_fscache_unregister_fs(fsc); 1575 1576 destroy_fs_client(fsc); 1577 } 1578 1579 static struct file_system_type ceph_fs_type = { 1580 .owner = THIS_MODULE, 1581 .name = "ceph", 1582 .init_fs_context = ceph_init_fs_context, 1583 .kill_sb = ceph_kill_sb, 1584 .fs_flags = FS_RENAME_DOES_D_MOVE, 1585 }; 1586 MODULE_ALIAS_FS("ceph"); 1587 1588 int ceph_force_reconnect(struct super_block *sb) 1589 { 1590 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1591 int err = 0; 1592 1593 fsc->mount_state = CEPH_MOUNT_RECOVER; 1594 __ceph_umount_begin(fsc); 1595 1596 /* Make sure all page caches get invalidated. 1597 * see remove_session_caps_cb() */ 1598 flush_workqueue(fsc->inode_wq); 1599 1600 /* In case that we were blocklisted. This also reset 1601 * all mon/osd connections */ 1602 ceph_reset_client_addr(fsc->client); 1603 1604 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1605 1606 fsc->blocklisted = false; 1607 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1608 1609 if (sb->s_root) { 1610 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1611 CEPH_STAT_CAP_INODE, true); 1612 } 1613 return err; 1614 } 1615 1616 static int __init init_ceph(void) 1617 { 1618 int ret = init_caches(); 1619 if (ret) 1620 goto out; 1621 1622 ceph_flock_init(); 1623 ret = register_filesystem(&ceph_fs_type); 1624 if (ret) 1625 goto out_caches; 1626 1627 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1628 1629 return 0; 1630 1631 out_caches: 1632 destroy_caches(); 1633 out: 1634 return ret; 1635 } 1636 1637 static void __exit exit_ceph(void) 1638 { 1639 dout("exit_ceph\n"); 1640 unregister_filesystem(&ceph_fs_type); 1641 destroy_caches(); 1642 } 1643 1644 static int param_set_metrics(const char *val, const struct kernel_param *kp) 1645 { 1646 struct ceph_fs_client *fsc; 1647 int ret; 1648 1649 ret = param_set_bool(val, kp); 1650 if (ret) { 1651 pr_err("Failed to parse sending metrics switch value '%s'\n", 1652 val); 1653 return ret; 1654 } else if (!disable_send_metrics) { 1655 // wake up all the mds clients 1656 spin_lock(&ceph_fsc_lock); 1657 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1658 metric_schedule_delayed(&fsc->mdsc->metric); 1659 } 1660 spin_unlock(&ceph_fsc_lock); 1661 } 1662 1663 return 0; 1664 } 1665 1666 static const struct kernel_param_ops param_ops_metrics = { 1667 .set = param_set_metrics, 1668 .get = param_get_bool, 1669 }; 1670 1671 bool disable_send_metrics = false; 1672 module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); 1673 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1674 1675 /* for both v1 and v2 syntax */ 1676 static bool mount_support = true; 1677 static const struct kernel_param_ops param_ops_mount_syntax = { 1678 .get = param_get_bool, 1679 }; 1680 module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); 1681 module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); 1682 1683 module_init(init_ceph); 1684 module_exit(exit_ceph); 1685 1686 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1687 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1688 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1689 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1690 MODULE_LICENSE("GPL"); 1691