1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 #include "crypto.h" 24 25 #include <linux/ceph/ceph_features.h> 26 #include <linux/ceph/decode.h> 27 #include <linux/ceph/mon_client.h> 28 #include <linux/ceph/auth.h> 29 #include <linux/ceph/debugfs.h> 30 31 #include <uapi/linux/magic.h> 32 33 static DEFINE_SPINLOCK(ceph_fsc_lock); 34 static LIST_HEAD(ceph_fsc_list); 35 36 /* 37 * Ceph superblock operations 38 * 39 * Handle the basics of mounting, unmounting. 40 */ 41 42 /* 43 * super ops 44 */ 45 static void ceph_put_super(struct super_block *s) 46 { 47 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); 48 49 dout("put_super\n"); 50 ceph_fscrypt_free_dummy_policy(fsc); 51 ceph_mdsc_close_sessions(fsc->mdsc); 52 } 53 54 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 55 { 56 struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry)); 57 struct ceph_mon_client *monc = &fsc->client->monc; 58 struct ceph_statfs st; 59 int i, err; 60 u64 data_pool; 61 62 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 63 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 64 } else { 65 data_pool = CEPH_NOPOOL; 66 } 67 68 dout("statfs\n"); 69 err = ceph_monc_do_statfs(monc, data_pool, &st); 70 if (err < 0) 71 return err; 72 73 /* fill in kstatfs */ 74 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 75 76 /* 77 * Express utilization in terms of large blocks to avoid 78 * overflow on 32-bit machines. 79 */ 80 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 81 82 /* 83 * By default use root quota for stats; fallback to overall filesystem 84 * usage if using 'noquotadf' mount option or if the root dir doesn't 85 * have max_bytes quota set. 86 */ 87 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 88 !ceph_quota_update_statfs(fsc, buf)) { 89 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 92 } 93 94 /* 95 * NOTE: for the time being, we make bsize == frsize to humor 96 * not-yet-ancient versions of glibc that are broken. 97 * Someday, we will probably want to report a real block 98 * size... whatever that may mean for a network file system! 99 */ 100 buf->f_bsize = buf->f_frsize; 101 102 buf->f_files = le64_to_cpu(st.num_objects); 103 buf->f_ffree = -1; 104 buf->f_namelen = NAME_MAX; 105 106 /* Must convert the fsid, for consistent values across arches */ 107 buf->f_fsid.val[0] = 0; 108 mutex_lock(&monc->mutex); 109 for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) 110 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); 111 mutex_unlock(&monc->mutex); 112 113 /* fold the fs_cluster_id into the upper bits */ 114 buf->f_fsid.val[1] = monc->fs_cluster_id; 115 116 return 0; 117 } 118 119 static int ceph_sync_fs(struct super_block *sb, int wait) 120 { 121 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 122 123 if (!wait) { 124 dout("sync_fs (non-blocking)\n"); 125 ceph_flush_dirty_caps(fsc->mdsc); 126 dout("sync_fs (non-blocking) done\n"); 127 return 0; 128 } 129 130 dout("sync_fs (blocking)\n"); 131 ceph_osdc_sync(&fsc->client->osdc); 132 ceph_mdsc_sync(fsc->mdsc); 133 dout("sync_fs (blocking) done\n"); 134 return 0; 135 } 136 137 /* 138 * mount options 139 */ 140 enum { 141 Opt_wsize, 142 Opt_rsize, 143 Opt_rasize, 144 Opt_caps_wanted_delay_min, 145 Opt_caps_wanted_delay_max, 146 Opt_caps_max, 147 Opt_readdir_max_entries, 148 Opt_readdir_max_bytes, 149 Opt_congestion_kb, 150 /* int args above */ 151 Opt_snapdirname, 152 Opt_mds_namespace, 153 Opt_recover_session, 154 Opt_source, 155 Opt_mon_addr, 156 Opt_test_dummy_encryption, 157 /* string args above */ 158 Opt_dirstat, 159 Opt_rbytes, 160 Opt_asyncreaddir, 161 Opt_dcache, 162 Opt_ino32, 163 Opt_fscache, 164 Opt_poolperm, 165 Opt_require_active_mds, 166 Opt_acl, 167 Opt_quotadf, 168 Opt_copyfrom, 169 Opt_wsync, 170 Opt_pagecache, 171 Opt_sparseread, 172 }; 173 174 enum ceph_recover_session_mode { 175 ceph_recover_session_no, 176 ceph_recover_session_clean 177 }; 178 179 static const struct constant_table ceph_param_recover[] = { 180 { "no", ceph_recover_session_no }, 181 { "clean", ceph_recover_session_clean }, 182 {} 183 }; 184 185 static const struct fs_parameter_spec ceph_mount_parameters[] = { 186 fsparam_flag_no ("acl", Opt_acl), 187 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 188 fsparam_s32 ("caps_max", Opt_caps_max), 189 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 190 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 191 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 192 fsparam_flag_no ("copyfrom", Opt_copyfrom), 193 fsparam_flag_no ("dcache", Opt_dcache), 194 fsparam_flag_no ("dirstat", Opt_dirstat), 195 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 196 fsparam_string ("fsc", Opt_fscache), // fsc=... 197 fsparam_flag_no ("ino32", Opt_ino32), 198 fsparam_string ("mds_namespace", Opt_mds_namespace), 199 fsparam_string ("mon_addr", Opt_mon_addr), 200 fsparam_flag_no ("poolperm", Opt_poolperm), 201 fsparam_flag_no ("quotadf", Opt_quotadf), 202 fsparam_u32 ("rasize", Opt_rasize), 203 fsparam_flag_no ("rbytes", Opt_rbytes), 204 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 205 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 206 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 207 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 208 fsparam_u32 ("rsize", Opt_rsize), 209 fsparam_string ("snapdirname", Opt_snapdirname), 210 fsparam_string ("source", Opt_source), 211 fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption), 212 fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption), 213 fsparam_u32 ("wsize", Opt_wsize), 214 fsparam_flag_no ("wsync", Opt_wsync), 215 fsparam_flag_no ("pagecache", Opt_pagecache), 216 fsparam_flag_no ("sparseread", Opt_sparseread), 217 {} 218 }; 219 220 struct ceph_parse_opts_ctx { 221 struct ceph_options *copts; 222 struct ceph_mount_options *opts; 223 }; 224 225 /* 226 * Remove adjacent slashes and then the trailing slash, unless it is 227 * the only remaining character. 228 * 229 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 230 */ 231 static void canonicalize_path(char *path) 232 { 233 int i, j = 0; 234 235 for (i = 0; path[i] != '\0'; i++) { 236 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 237 path[j++] = path[i]; 238 } 239 240 if (j > 1 && path[j - 1] == '/') 241 j--; 242 path[j] = '\0'; 243 } 244 245 /* 246 * Check if the mds namespace in ceph_mount_options matches 247 * the passed in namespace string. First time match (when 248 * ->mds_namespace is NULL) is treated specially, since 249 * ->mds_namespace needs to be initialized by the caller. 250 */ 251 static int namespace_equals(struct ceph_mount_options *fsopt, 252 const char *namespace, size_t len) 253 { 254 return !(fsopt->mds_namespace && 255 (strlen(fsopt->mds_namespace) != len || 256 strncmp(fsopt->mds_namespace, namespace, len))); 257 } 258 259 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, 260 struct fs_context *fc) 261 { 262 int r; 263 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 264 struct ceph_mount_options *fsopt = pctx->opts; 265 266 if (*dev_name_end != ':') 267 return invalfc(fc, "separator ':' missing in source"); 268 269 r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, 270 pctx->copts, fc->log.log, ','); 271 if (r) 272 return r; 273 274 fsopt->new_dev_syntax = false; 275 return 0; 276 } 277 278 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, 279 struct fs_context *fc) 280 { 281 size_t len; 282 struct ceph_fsid fsid; 283 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 284 struct ceph_mount_options *fsopt = pctx->opts; 285 char *fsid_start, *fs_name_start; 286 287 if (*dev_name_end != '=') { 288 dout("separator '=' missing in source"); 289 return -EINVAL; 290 } 291 292 fsid_start = strchr(dev_name, '@'); 293 if (!fsid_start) 294 return invalfc(fc, "missing cluster fsid"); 295 ++fsid_start; /* start of cluster fsid */ 296 297 fs_name_start = strchr(fsid_start, '.'); 298 if (!fs_name_start) 299 return invalfc(fc, "missing file system name"); 300 301 if (ceph_parse_fsid(fsid_start, &fsid)) 302 return invalfc(fc, "Invalid FSID"); 303 304 ++fs_name_start; /* start of file system name */ 305 len = dev_name_end - fs_name_start; 306 307 if (!namespace_equals(fsopt, fs_name_start, len)) 308 return invalfc(fc, "Mismatching mds_namespace"); 309 kfree(fsopt->mds_namespace); 310 fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); 311 if (!fsopt->mds_namespace) 312 return -ENOMEM; 313 dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); 314 315 fsopt->new_dev_syntax = true; 316 return 0; 317 } 318 319 /* 320 * Parse the source parameter for new device format. Distinguish the device 321 * spec from the path. Try parsing new device format and fallback to old 322 * format if needed. 323 * 324 * New device syntax will looks like: 325 * <device_spec>=/<path> 326 * where 327 * <device_spec> is name@fsid.fsname 328 * <path> is optional, but if present must begin with '/' 329 * (monitor addresses are passed via mount option) 330 * 331 * Old device syntax is: 332 * <server_spec>[,<server_spec>...]:[<path>] 333 * where 334 * <server_spec> is <ip>[:<port>] 335 * <path> is optional, but if present must begin with '/' 336 */ 337 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 338 { 339 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 340 struct ceph_mount_options *fsopt = pctx->opts; 341 char *dev_name = param->string, *dev_name_end; 342 int ret; 343 344 dout("%s '%s'\n", __func__, dev_name); 345 if (!dev_name || !*dev_name) 346 return invalfc(fc, "Empty source"); 347 348 dev_name_end = strchr(dev_name, '/'); 349 if (dev_name_end) { 350 /* 351 * The server_path will include the whole chars from userland 352 * including the leading '/'. 353 */ 354 kfree(fsopt->server_path); 355 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 356 if (!fsopt->server_path) 357 return -ENOMEM; 358 359 canonicalize_path(fsopt->server_path); 360 } else { 361 dev_name_end = dev_name + strlen(dev_name); 362 } 363 364 dev_name_end--; /* back up to separator */ 365 if (dev_name_end < dev_name) 366 return invalfc(fc, "Path missing in source"); 367 368 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 369 if (fsopt->server_path) 370 dout("server path '%s'\n", fsopt->server_path); 371 372 dout("trying new device syntax"); 373 ret = ceph_parse_new_source(dev_name, dev_name_end, fc); 374 if (ret) { 375 if (ret != -EINVAL) 376 return ret; 377 dout("trying old device syntax"); 378 ret = ceph_parse_old_source(dev_name, dev_name_end, fc); 379 if (ret) 380 return ret; 381 } 382 383 fc->source = param->string; 384 param->string = NULL; 385 return 0; 386 } 387 388 static int ceph_parse_mon_addr(struct fs_parameter *param, 389 struct fs_context *fc) 390 { 391 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 392 struct ceph_mount_options *fsopt = pctx->opts; 393 394 kfree(fsopt->mon_addr); 395 fsopt->mon_addr = param->string; 396 param->string = NULL; 397 398 return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), 399 pctx->copts, fc->log.log, '/'); 400 } 401 402 static int ceph_parse_mount_param(struct fs_context *fc, 403 struct fs_parameter *param) 404 { 405 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 406 struct ceph_mount_options *fsopt = pctx->opts; 407 struct fs_parse_result result; 408 unsigned int mode; 409 int token, ret; 410 411 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 412 if (ret != -ENOPARAM) 413 return ret; 414 415 token = fs_parse(fc, ceph_mount_parameters, param, &result); 416 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 417 if (token < 0) 418 return token; 419 420 switch (token) { 421 case Opt_snapdirname: 422 kfree(fsopt->snapdir_name); 423 fsopt->snapdir_name = param->string; 424 param->string = NULL; 425 break; 426 case Opt_mds_namespace: 427 if (!namespace_equals(fsopt, param->string, strlen(param->string))) 428 return invalfc(fc, "Mismatching mds_namespace"); 429 kfree(fsopt->mds_namespace); 430 fsopt->mds_namespace = param->string; 431 param->string = NULL; 432 break; 433 case Opt_recover_session: 434 mode = result.uint_32; 435 if (mode == ceph_recover_session_no) 436 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 437 else if (mode == ceph_recover_session_clean) 438 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 439 else 440 BUG(); 441 break; 442 case Opt_source: 443 if (fc->source) 444 return invalfc(fc, "Multiple sources specified"); 445 return ceph_parse_source(param, fc); 446 case Opt_mon_addr: 447 return ceph_parse_mon_addr(param, fc); 448 case Opt_wsize: 449 if (result.uint_32 < PAGE_SIZE || 450 result.uint_32 > CEPH_MAX_WRITE_SIZE) 451 goto out_of_range; 452 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 453 break; 454 case Opt_rsize: 455 if (result.uint_32 < PAGE_SIZE || 456 result.uint_32 > CEPH_MAX_READ_SIZE) 457 goto out_of_range; 458 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 459 break; 460 case Opt_rasize: 461 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 462 break; 463 case Opt_caps_wanted_delay_min: 464 if (result.uint_32 < 1) 465 goto out_of_range; 466 fsopt->caps_wanted_delay_min = result.uint_32; 467 break; 468 case Opt_caps_wanted_delay_max: 469 if (result.uint_32 < 1) 470 goto out_of_range; 471 fsopt->caps_wanted_delay_max = result.uint_32; 472 break; 473 case Opt_caps_max: 474 if (result.int_32 < 0) 475 goto out_of_range; 476 fsopt->caps_max = result.int_32; 477 break; 478 case Opt_readdir_max_entries: 479 if (result.uint_32 < 1) 480 goto out_of_range; 481 fsopt->max_readdir = result.uint_32; 482 break; 483 case Opt_readdir_max_bytes: 484 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 485 goto out_of_range; 486 fsopt->max_readdir_bytes = result.uint_32; 487 break; 488 case Opt_congestion_kb: 489 if (result.uint_32 < 1024) /* at least 1M */ 490 goto out_of_range; 491 fsopt->congestion_kb = result.uint_32; 492 break; 493 case Opt_dirstat: 494 if (!result.negated) 495 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 496 else 497 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 498 break; 499 case Opt_rbytes: 500 if (!result.negated) 501 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 502 else 503 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 504 break; 505 case Opt_asyncreaddir: 506 if (!result.negated) 507 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 508 else 509 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 510 break; 511 case Opt_dcache: 512 if (!result.negated) 513 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 514 else 515 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 516 break; 517 case Opt_ino32: 518 if (!result.negated) 519 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 520 else 521 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 522 break; 523 524 case Opt_fscache: 525 #ifdef CONFIG_CEPH_FSCACHE 526 kfree(fsopt->fscache_uniq); 527 fsopt->fscache_uniq = NULL; 528 if (result.negated) { 529 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 530 } else { 531 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 532 fsopt->fscache_uniq = param->string; 533 param->string = NULL; 534 } 535 break; 536 #else 537 return invalfc(fc, "fscache support is disabled"); 538 #endif 539 case Opt_poolperm: 540 if (!result.negated) 541 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 542 else 543 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 544 break; 545 case Opt_require_active_mds: 546 if (!result.negated) 547 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 548 else 549 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 550 break; 551 case Opt_quotadf: 552 if (!result.negated) 553 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 554 else 555 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 556 break; 557 case Opt_copyfrom: 558 if (!result.negated) 559 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 560 else 561 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 562 break; 563 case Opt_acl: 564 if (!result.negated) { 565 #ifdef CONFIG_CEPH_FS_POSIX_ACL 566 fc->sb_flags |= SB_POSIXACL; 567 #else 568 return invalfc(fc, "POSIX ACL support is disabled"); 569 #endif 570 } else { 571 fc->sb_flags &= ~SB_POSIXACL; 572 } 573 break; 574 case Opt_wsync: 575 if (!result.negated) 576 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 577 else 578 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 579 break; 580 case Opt_pagecache: 581 if (result.negated) 582 fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; 583 else 584 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; 585 break; 586 case Opt_sparseread: 587 if (result.negated) 588 fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD; 589 else 590 fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD; 591 break; 592 case Opt_test_dummy_encryption: 593 #ifdef CONFIG_FS_ENCRYPTION 594 fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy); 595 ret = fscrypt_parse_test_dummy_encryption(param, 596 &fsopt->dummy_enc_policy); 597 if (ret == -EINVAL) { 598 warnfc(fc, "Value of option \"%s\" is unrecognized", 599 param->key); 600 } else if (ret == -EEXIST) { 601 warnfc(fc, "Conflicting test_dummy_encryption options"); 602 ret = -EINVAL; 603 } 604 #else 605 warnfc(fc, 606 "FS encryption not supported: test_dummy_encryption mount option ignored"); 607 #endif 608 break; 609 default: 610 BUG(); 611 } 612 return 0; 613 614 out_of_range: 615 return invalfc(fc, "%s out of range", param->key); 616 } 617 618 static void destroy_mount_options(struct ceph_mount_options *args) 619 { 620 dout("destroy_mount_options %p\n", args); 621 if (!args) 622 return; 623 624 kfree(args->snapdir_name); 625 kfree(args->mds_namespace); 626 kfree(args->server_path); 627 kfree(args->fscache_uniq); 628 kfree(args->mon_addr); 629 fscrypt_free_dummy_policy(&args->dummy_enc_policy); 630 kfree(args); 631 } 632 633 static int strcmp_null(const char *s1, const char *s2) 634 { 635 if (!s1 && !s2) 636 return 0; 637 if (s1 && !s2) 638 return -1; 639 if (!s1 && s2) 640 return 1; 641 return strcmp(s1, s2); 642 } 643 644 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 645 struct ceph_options *new_opt, 646 struct ceph_fs_client *fsc) 647 { 648 struct ceph_mount_options *fsopt1 = new_fsopt; 649 struct ceph_mount_options *fsopt2 = fsc->mount_options; 650 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 651 int ret; 652 653 ret = memcmp(fsopt1, fsopt2, ofs); 654 if (ret) 655 return ret; 656 657 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 658 if (ret) 659 return ret; 660 661 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 662 if (ret) 663 return ret; 664 665 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 666 if (ret) 667 return ret; 668 669 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 670 if (ret) 671 return ret; 672 673 ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); 674 if (ret) 675 return ret; 676 677 return ceph_compare_options(new_opt, fsc->client); 678 } 679 680 /** 681 * ceph_show_options - Show mount options in /proc/mounts 682 * @m: seq_file to write to 683 * @root: root of that (sub)tree 684 */ 685 static int ceph_show_options(struct seq_file *m, struct dentry *root) 686 { 687 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb); 688 struct ceph_mount_options *fsopt = fsc->mount_options; 689 size_t pos; 690 int ret; 691 692 /* a comma between MNT/MS and client options */ 693 seq_putc(m, ','); 694 pos = m->count; 695 696 ret = ceph_print_client_options(m, fsc->client, false); 697 if (ret) 698 return ret; 699 700 /* retract our comma if no client options */ 701 if (m->count == pos) 702 m->count--; 703 704 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 705 seq_puts(m, ",dirstat"); 706 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 707 seq_puts(m, ",rbytes"); 708 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 709 seq_puts(m, ",noasyncreaddir"); 710 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 711 seq_puts(m, ",nodcache"); 712 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 713 seq_puts(m, ",ino32"); 714 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 715 seq_show_option(m, "fsc", fsopt->fscache_uniq); 716 } 717 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 718 seq_puts(m, ",nopoolperm"); 719 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 720 seq_puts(m, ",noquotadf"); 721 722 #ifdef CONFIG_CEPH_FS_POSIX_ACL 723 if (root->d_sb->s_flags & SB_POSIXACL) 724 seq_puts(m, ",acl"); 725 else 726 seq_puts(m, ",noacl"); 727 #endif 728 729 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 730 seq_puts(m, ",copyfrom"); 731 732 /* dump mds_namespace when old device syntax is in use */ 733 if (fsopt->mds_namespace && !fsopt->new_dev_syntax) 734 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 735 736 if (fsopt->mon_addr) 737 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); 738 739 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 740 seq_show_option(m, "recover_session", "clean"); 741 742 if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) 743 seq_puts(m, ",wsync"); 744 if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) 745 seq_puts(m, ",nopagecache"); 746 if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD) 747 seq_puts(m, ",sparseread"); 748 749 fscrypt_show_test_dummy_encryption(m, ',', root->d_sb); 750 751 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 752 seq_printf(m, ",wsize=%u", fsopt->wsize); 753 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 754 seq_printf(m, ",rsize=%u", fsopt->rsize); 755 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 756 seq_printf(m, ",rasize=%u", fsopt->rasize); 757 if (fsopt->congestion_kb != default_congestion_kb()) 758 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 759 if (fsopt->caps_max) 760 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 761 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 762 seq_printf(m, ",caps_wanted_delay_min=%u", 763 fsopt->caps_wanted_delay_min); 764 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 765 seq_printf(m, ",caps_wanted_delay_max=%u", 766 fsopt->caps_wanted_delay_max); 767 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 768 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 769 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 770 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 771 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 772 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 773 774 return 0; 775 } 776 777 /* 778 * handle any mon messages the standard library doesn't understand. 779 * return error if we don't either. 780 */ 781 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 782 { 783 struct ceph_fs_client *fsc = client->private; 784 int type = le16_to_cpu(msg->hdr.type); 785 786 switch (type) { 787 case CEPH_MSG_MDS_MAP: 788 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 789 return 0; 790 case CEPH_MSG_FS_MAP_USER: 791 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 792 return 0; 793 default: 794 return -1; 795 } 796 } 797 798 /* 799 * create a new fs client 800 * 801 * Success or not, this function consumes @fsopt and @opt. 802 */ 803 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 804 struct ceph_options *opt) 805 { 806 struct ceph_fs_client *fsc; 807 int err; 808 809 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 810 if (!fsc) { 811 err = -ENOMEM; 812 goto fail; 813 } 814 815 fsc->client = ceph_create_client(opt, fsc); 816 if (IS_ERR(fsc->client)) { 817 err = PTR_ERR(fsc->client); 818 goto fail; 819 } 820 opt = NULL; /* fsc->client now owns this */ 821 822 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 823 ceph_set_opt(fsc->client, ABORT_ON_FULL); 824 825 if (!fsopt->mds_namespace) { 826 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 827 0, true); 828 } else { 829 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 830 0, false); 831 } 832 833 fsc->mount_options = fsopt; 834 835 fsc->sb = NULL; 836 fsc->mount_state = CEPH_MOUNT_MOUNTING; 837 fsc->filp_gen = 1; 838 fsc->have_copy_from2 = true; 839 840 atomic_long_set(&fsc->writeback_count, 0); 841 fsc->write_congested = false; 842 843 err = -ENOMEM; 844 /* 845 * The number of concurrent works can be high but they don't need 846 * to be processed in parallel, limit concurrency. 847 */ 848 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 849 if (!fsc->inode_wq) 850 goto fail_client; 851 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 852 if (!fsc->cap_wq) 853 goto fail_inode_wq; 854 855 hash_init(fsc->async_unlink_conflict); 856 spin_lock_init(&fsc->async_unlink_conflict_lock); 857 858 spin_lock(&ceph_fsc_lock); 859 list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 860 spin_unlock(&ceph_fsc_lock); 861 862 return fsc; 863 864 fail_inode_wq: 865 destroy_workqueue(fsc->inode_wq); 866 fail_client: 867 ceph_destroy_client(fsc->client); 868 fail: 869 kfree(fsc); 870 if (opt) 871 ceph_destroy_options(opt); 872 destroy_mount_options(fsopt); 873 return ERR_PTR(err); 874 } 875 876 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 877 { 878 flush_workqueue(fsc->inode_wq); 879 flush_workqueue(fsc->cap_wq); 880 } 881 882 static void destroy_fs_client(struct ceph_fs_client *fsc) 883 { 884 dout("destroy_fs_client %p\n", fsc); 885 886 spin_lock(&ceph_fsc_lock); 887 list_del(&fsc->metric_wakeup); 888 spin_unlock(&ceph_fsc_lock); 889 890 ceph_mdsc_destroy(fsc); 891 destroy_workqueue(fsc->inode_wq); 892 destroy_workqueue(fsc->cap_wq); 893 894 destroy_mount_options(fsc->mount_options); 895 896 ceph_destroy_client(fsc->client); 897 898 kfree(fsc); 899 dout("destroy_fs_client %p done\n", fsc); 900 } 901 902 /* 903 * caches 904 */ 905 struct kmem_cache *ceph_inode_cachep; 906 struct kmem_cache *ceph_cap_cachep; 907 struct kmem_cache *ceph_cap_snap_cachep; 908 struct kmem_cache *ceph_cap_flush_cachep; 909 struct kmem_cache *ceph_dentry_cachep; 910 struct kmem_cache *ceph_file_cachep; 911 struct kmem_cache *ceph_dir_file_cachep; 912 struct kmem_cache *ceph_mds_request_cachep; 913 mempool_t *ceph_wb_pagevec_pool; 914 915 static void ceph_inode_init_once(void *foo) 916 { 917 struct ceph_inode_info *ci = foo; 918 inode_init_once(&ci->netfs.inode); 919 } 920 921 static int __init init_caches(void) 922 { 923 int error = -ENOMEM; 924 925 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 926 sizeof(struct ceph_inode_info), 927 __alignof__(struct ceph_inode_info), 928 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 929 SLAB_ACCOUNT, ceph_inode_init_once); 930 if (!ceph_inode_cachep) 931 return -ENOMEM; 932 933 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 934 if (!ceph_cap_cachep) 935 goto bad_cap; 936 ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD); 937 if (!ceph_cap_snap_cachep) 938 goto bad_cap_snap; 939 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 940 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 941 if (!ceph_cap_flush_cachep) 942 goto bad_cap_flush; 943 944 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 945 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 946 if (!ceph_dentry_cachep) 947 goto bad_dentry; 948 949 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 950 if (!ceph_file_cachep) 951 goto bad_file; 952 953 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 954 if (!ceph_dir_file_cachep) 955 goto bad_dir_file; 956 957 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 958 if (!ceph_mds_request_cachep) 959 goto bad_mds_req; 960 961 ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, 962 (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *)); 963 if (!ceph_wb_pagevec_pool) 964 goto bad_pagevec_pool; 965 966 return 0; 967 968 bad_pagevec_pool: 969 kmem_cache_destroy(ceph_mds_request_cachep); 970 bad_mds_req: 971 kmem_cache_destroy(ceph_dir_file_cachep); 972 bad_dir_file: 973 kmem_cache_destroy(ceph_file_cachep); 974 bad_file: 975 kmem_cache_destroy(ceph_dentry_cachep); 976 bad_dentry: 977 kmem_cache_destroy(ceph_cap_flush_cachep); 978 bad_cap_flush: 979 kmem_cache_destroy(ceph_cap_snap_cachep); 980 bad_cap_snap: 981 kmem_cache_destroy(ceph_cap_cachep); 982 bad_cap: 983 kmem_cache_destroy(ceph_inode_cachep); 984 return error; 985 } 986 987 static void destroy_caches(void) 988 { 989 /* 990 * Make sure all delayed rcu free inodes are flushed before we 991 * destroy cache. 992 */ 993 rcu_barrier(); 994 995 kmem_cache_destroy(ceph_inode_cachep); 996 kmem_cache_destroy(ceph_cap_cachep); 997 kmem_cache_destroy(ceph_cap_snap_cachep); 998 kmem_cache_destroy(ceph_cap_flush_cachep); 999 kmem_cache_destroy(ceph_dentry_cachep); 1000 kmem_cache_destroy(ceph_file_cachep); 1001 kmem_cache_destroy(ceph_dir_file_cachep); 1002 kmem_cache_destroy(ceph_mds_request_cachep); 1003 mempool_destroy(ceph_wb_pagevec_pool); 1004 } 1005 1006 static void __ceph_umount_begin(struct ceph_fs_client *fsc) 1007 { 1008 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 1009 ceph_mdsc_force_umount(fsc->mdsc); 1010 fsc->filp_gen++; // invalidate open files 1011 } 1012 1013 /* 1014 * ceph_umount_begin - initiate forced umount. Tear down the 1015 * mount, skipping steps that may hang while waiting for server(s). 1016 */ 1017 void ceph_umount_begin(struct super_block *sb) 1018 { 1019 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1020 1021 dout("ceph_umount_begin - starting forced umount\n"); 1022 if (!fsc) 1023 return; 1024 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 1025 __ceph_umount_begin(fsc); 1026 } 1027 1028 static const struct super_operations ceph_super_ops = { 1029 .alloc_inode = ceph_alloc_inode, 1030 .free_inode = ceph_free_inode, 1031 .write_inode = ceph_write_inode, 1032 .drop_inode = generic_delete_inode, 1033 .evict_inode = ceph_evict_inode, 1034 .sync_fs = ceph_sync_fs, 1035 .put_super = ceph_put_super, 1036 .show_options = ceph_show_options, 1037 .statfs = ceph_statfs, 1038 .umount_begin = ceph_umount_begin, 1039 }; 1040 1041 /* 1042 * Bootstrap mount by opening the root directory. Note the mount 1043 * @started time from caller, and time out if this takes too long. 1044 */ 1045 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 1046 const char *path, 1047 unsigned long started) 1048 { 1049 struct ceph_mds_client *mdsc = fsc->mdsc; 1050 struct ceph_mds_request *req = NULL; 1051 int err; 1052 struct dentry *root; 1053 1054 /* open dir */ 1055 dout("open_root_inode opening '%s'\n", path); 1056 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1057 if (IS_ERR(req)) 1058 return ERR_CAST(req); 1059 req->r_path1 = kstrdup(path, GFP_NOFS); 1060 if (!req->r_path1) { 1061 root = ERR_PTR(-ENOMEM); 1062 goto out; 1063 } 1064 1065 req->r_ino1.ino = CEPH_INO_ROOT; 1066 req->r_ino1.snap = CEPH_NOSNAP; 1067 req->r_started = started; 1068 req->r_timeout = fsc->client->options->mount_timeout; 1069 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 1070 req->r_num_caps = 2; 1071 err = ceph_mdsc_do_request(mdsc, NULL, req); 1072 if (err == 0) { 1073 struct inode *inode = req->r_target_inode; 1074 req->r_target_inode = NULL; 1075 dout("open_root_inode success\n"); 1076 root = d_make_root(inode); 1077 if (!root) { 1078 root = ERR_PTR(-ENOMEM); 1079 goto out; 1080 } 1081 dout("open_root_inode success, root dentry is %p\n", root); 1082 } else { 1083 root = ERR_PTR(err); 1084 } 1085 out: 1086 ceph_mdsc_put_request(req); 1087 return root; 1088 } 1089 1090 #ifdef CONFIG_FS_ENCRYPTION 1091 static int ceph_apply_test_dummy_encryption(struct super_block *sb, 1092 struct fs_context *fc, 1093 struct ceph_mount_options *fsopt) 1094 { 1095 struct ceph_fs_client *fsc = sb->s_fs_info; 1096 1097 if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy)) 1098 return 0; 1099 1100 /* No changing encryption context on remount. */ 1101 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && 1102 !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { 1103 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy, 1104 &fsc->fsc_dummy_enc_policy)) 1105 return 0; 1106 errorfc(fc, "Can't set test_dummy_encryption on remount"); 1107 return -EINVAL; 1108 } 1109 1110 /* Also make sure fsopt doesn't contain a conflicting value. */ 1111 if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { 1112 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy, 1113 &fsc->fsc_dummy_enc_policy)) 1114 return 0; 1115 errorfc(fc, "Conflicting test_dummy_encryption options"); 1116 return -EINVAL; 1117 } 1118 1119 fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy; 1120 memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy)); 1121 1122 warnfc(fc, "test_dummy_encryption mode enabled"); 1123 return 0; 1124 } 1125 #else 1126 static int ceph_apply_test_dummy_encryption(struct super_block *sb, 1127 struct fs_context *fc, 1128 struct ceph_mount_options *fsopt) 1129 { 1130 return 0; 1131 } 1132 #endif 1133 1134 /* 1135 * mount: join the ceph cluster, and open root directory. 1136 */ 1137 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 1138 struct fs_context *fc) 1139 { 1140 int err; 1141 unsigned long started = jiffies; /* note the start time */ 1142 struct dentry *root; 1143 1144 dout("mount start %p\n", fsc); 1145 mutex_lock(&fsc->client->mount_mutex); 1146 1147 if (!fsc->sb->s_root) { 1148 const char *path = fsc->mount_options->server_path ? 1149 fsc->mount_options->server_path + 1 : ""; 1150 1151 err = __ceph_open_session(fsc->client, started); 1152 if (err < 0) 1153 goto out; 1154 1155 /* setup fscache */ 1156 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 1157 err = ceph_fscache_register_fs(fsc, fc); 1158 if (err < 0) 1159 goto out; 1160 } 1161 1162 err = ceph_apply_test_dummy_encryption(fsc->sb, fc, 1163 fsc->mount_options); 1164 if (err) 1165 goto out; 1166 1167 dout("mount opening path '%s'\n", path); 1168 1169 ceph_fs_debugfs_init(fsc); 1170 1171 root = open_root_dentry(fsc, path, started); 1172 if (IS_ERR(root)) { 1173 err = PTR_ERR(root); 1174 goto out; 1175 } 1176 fsc->sb->s_root = dget(root); 1177 } else { 1178 root = dget(fsc->sb->s_root); 1179 } 1180 1181 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1182 dout("mount success\n"); 1183 mutex_unlock(&fsc->client->mount_mutex); 1184 return root; 1185 1186 out: 1187 mutex_unlock(&fsc->client->mount_mutex); 1188 ceph_fscrypt_free_dummy_policy(fsc); 1189 return ERR_PTR(err); 1190 } 1191 1192 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1193 { 1194 struct ceph_fs_client *fsc = s->s_fs_info; 1195 int ret; 1196 1197 dout("set_super %p\n", s); 1198 1199 s->s_maxbytes = MAX_LFS_FILESIZE; 1200 1201 s->s_xattr = ceph_xattr_handlers; 1202 fsc->sb = s; 1203 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1204 1205 s->s_op = &ceph_super_ops; 1206 s->s_d_op = &ceph_dentry_ops; 1207 s->s_export_op = &ceph_export_ops; 1208 1209 s->s_time_gran = 1; 1210 s->s_time_min = 0; 1211 s->s_time_max = U32_MAX; 1212 s->s_flags |= SB_NODIRATIME | SB_NOATIME; 1213 1214 ceph_fscrypt_set_ops(s); 1215 1216 ret = set_anon_super_fc(s, fc); 1217 if (ret != 0) 1218 fsc->sb = NULL; 1219 return ret; 1220 } 1221 1222 /* 1223 * share superblock if same fs AND options 1224 */ 1225 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1226 { 1227 struct ceph_fs_client *new = fc->s_fs_info; 1228 struct ceph_mount_options *fsopt = new->mount_options; 1229 struct ceph_options *opt = new->client->options; 1230 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1231 1232 dout("ceph_compare_super %p\n", sb); 1233 1234 if (compare_mount_options(fsopt, opt, fsc)) { 1235 dout("monitor(s)/mount options don't match\n"); 1236 return 0; 1237 } 1238 if ((opt->flags & CEPH_OPT_FSID) && 1239 ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { 1240 dout("fsid doesn't match\n"); 1241 return 0; 1242 } 1243 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1244 dout("flags differ\n"); 1245 return 0; 1246 } 1247 1248 if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { 1249 dout("client is blocklisted (and CLEANRECOVER is not set)\n"); 1250 return 0; 1251 } 1252 1253 if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 1254 dout("client has been forcibly unmounted\n"); 1255 return 0; 1256 } 1257 1258 return 1; 1259 } 1260 1261 /* 1262 * construct our own bdi so we can control readahead, etc. 1263 */ 1264 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1265 1266 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1267 { 1268 int err; 1269 1270 err = super_setup_bdi_name(sb, "ceph-%ld", 1271 atomic_long_inc_return(&bdi_seq)); 1272 if (err) 1273 return err; 1274 1275 /* set ra_pages based on rasize mount option? */ 1276 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1277 1278 /* set io_pages based on max osd read size */ 1279 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1280 1281 return 0; 1282 } 1283 1284 static int ceph_get_tree(struct fs_context *fc) 1285 { 1286 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1287 struct ceph_mount_options *fsopt = pctx->opts; 1288 struct super_block *sb; 1289 struct ceph_fs_client *fsc; 1290 struct dentry *res; 1291 int (*compare_super)(struct super_block *, struct fs_context *) = 1292 ceph_compare_super; 1293 int err; 1294 1295 dout("ceph_get_tree\n"); 1296 1297 if (!fc->source) 1298 return invalfc(fc, "No source"); 1299 if (fsopt->new_dev_syntax && !fsopt->mon_addr) 1300 return invalfc(fc, "No monitor address"); 1301 1302 /* create client (which we may/may not use) */ 1303 fsc = create_fs_client(pctx->opts, pctx->copts); 1304 pctx->opts = NULL; 1305 pctx->copts = NULL; 1306 if (IS_ERR(fsc)) { 1307 err = PTR_ERR(fsc); 1308 goto out_final; 1309 } 1310 1311 err = ceph_mdsc_init(fsc); 1312 if (err < 0) 1313 goto out; 1314 1315 if (ceph_test_opt(fsc->client, NOSHARE)) 1316 compare_super = NULL; 1317 1318 fc->s_fs_info = fsc; 1319 sb = sget_fc(fc, compare_super, ceph_set_super); 1320 fc->s_fs_info = NULL; 1321 if (IS_ERR(sb)) { 1322 err = PTR_ERR(sb); 1323 goto out; 1324 } 1325 1326 if (ceph_sb_to_fs_client(sb) != fsc) { 1327 destroy_fs_client(fsc); 1328 fsc = ceph_sb_to_fs_client(sb); 1329 dout("get_sb got existing client %p\n", fsc); 1330 } else { 1331 dout("get_sb using new client %p\n", fsc); 1332 err = ceph_setup_bdi(sb, fsc); 1333 if (err < 0) 1334 goto out_splat; 1335 } 1336 1337 res = ceph_real_mount(fsc, fc); 1338 if (IS_ERR(res)) { 1339 err = PTR_ERR(res); 1340 goto out_splat; 1341 } 1342 dout("root %p inode %p ino %llx.%llx\n", res, 1343 d_inode(res), ceph_vinop(d_inode(res))); 1344 fc->root = fsc->sb->s_root; 1345 return 0; 1346 1347 out_splat: 1348 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1349 pr_info("No mds server is up or the cluster is laggy\n"); 1350 err = -EHOSTUNREACH; 1351 } 1352 1353 ceph_mdsc_close_sessions(fsc->mdsc); 1354 deactivate_locked_super(sb); 1355 goto out_final; 1356 1357 out: 1358 destroy_fs_client(fsc); 1359 out_final: 1360 dout("ceph_get_tree fail %d\n", err); 1361 return err; 1362 } 1363 1364 static void ceph_free_fc(struct fs_context *fc) 1365 { 1366 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1367 1368 if (pctx) { 1369 destroy_mount_options(pctx->opts); 1370 ceph_destroy_options(pctx->copts); 1371 kfree(pctx); 1372 } 1373 } 1374 1375 static int ceph_reconfigure_fc(struct fs_context *fc) 1376 { 1377 int err; 1378 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1379 struct ceph_mount_options *fsopt = pctx->opts; 1380 struct super_block *sb = fc->root->d_sb; 1381 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1382 1383 err = ceph_apply_test_dummy_encryption(sb, fc, fsopt); 1384 if (err) 1385 return err; 1386 1387 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1388 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1389 else 1390 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1391 1392 if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD) 1393 ceph_set_mount_opt(fsc, SPARSEREAD); 1394 else 1395 ceph_clear_mount_opt(fsc, SPARSEREAD); 1396 1397 if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { 1398 kfree(fsc->mount_options->mon_addr); 1399 fsc->mount_options->mon_addr = fsopt->mon_addr; 1400 fsopt->mon_addr = NULL; 1401 pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); 1402 } 1403 1404 sync_filesystem(sb); 1405 return 0; 1406 } 1407 1408 static const struct fs_context_operations ceph_context_ops = { 1409 .free = ceph_free_fc, 1410 .parse_param = ceph_parse_mount_param, 1411 .get_tree = ceph_get_tree, 1412 .reconfigure = ceph_reconfigure_fc, 1413 }; 1414 1415 /* 1416 * Set up the filesystem mount context. 1417 */ 1418 static int ceph_init_fs_context(struct fs_context *fc) 1419 { 1420 struct ceph_parse_opts_ctx *pctx; 1421 struct ceph_mount_options *fsopt; 1422 1423 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1424 if (!pctx) 1425 return -ENOMEM; 1426 1427 pctx->copts = ceph_alloc_options(); 1428 if (!pctx->copts) 1429 goto nomem; 1430 1431 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1432 if (!pctx->opts) 1433 goto nomem; 1434 1435 fsopt = pctx->opts; 1436 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1437 1438 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1439 fsopt->rsize = CEPH_MAX_READ_SIZE; 1440 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1441 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1442 if (!fsopt->snapdir_name) 1443 goto nomem; 1444 1445 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1446 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1447 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1448 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1449 fsopt->congestion_kb = default_congestion_kb(); 1450 1451 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1452 fc->sb_flags |= SB_POSIXACL; 1453 #endif 1454 1455 fc->fs_private = pctx; 1456 fc->ops = &ceph_context_ops; 1457 return 0; 1458 1459 nomem: 1460 destroy_mount_options(pctx->opts); 1461 ceph_destroy_options(pctx->copts); 1462 kfree(pctx); 1463 return -ENOMEM; 1464 } 1465 1466 /* 1467 * Return true if it successfully increases the blocker counter, 1468 * or false if the mdsc is in stopping and flushed state. 1469 */ 1470 static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc) 1471 { 1472 spin_lock(&mdsc->stopping_lock); 1473 if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) { 1474 spin_unlock(&mdsc->stopping_lock); 1475 return false; 1476 } 1477 atomic_inc(&mdsc->stopping_blockers); 1478 spin_unlock(&mdsc->stopping_lock); 1479 return true; 1480 } 1481 1482 static void __dec_stopping_blocker(struct ceph_mds_client *mdsc) 1483 { 1484 spin_lock(&mdsc->stopping_lock); 1485 if (!atomic_dec_return(&mdsc->stopping_blockers) && 1486 mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) 1487 complete_all(&mdsc->stopping_waiter); 1488 spin_unlock(&mdsc->stopping_lock); 1489 } 1490 1491 /* For metadata IO requests */ 1492 bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc, 1493 struct ceph_mds_session *session) 1494 { 1495 mutex_lock(&session->s_mutex); 1496 inc_session_sequence(session); 1497 mutex_unlock(&session->s_mutex); 1498 1499 return __inc_stopping_blocker(mdsc); 1500 } 1501 1502 void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc) 1503 { 1504 __dec_stopping_blocker(mdsc); 1505 } 1506 1507 /* For data IO requests */ 1508 bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc) 1509 { 1510 return __inc_stopping_blocker(mdsc); 1511 } 1512 1513 void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc) 1514 { 1515 __dec_stopping_blocker(mdsc); 1516 } 1517 1518 static void ceph_kill_sb(struct super_block *s) 1519 { 1520 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); 1521 struct ceph_mds_client *mdsc = fsc->mdsc; 1522 bool wait; 1523 1524 dout("kill_sb %p\n", s); 1525 1526 ceph_mdsc_pre_umount(mdsc); 1527 flush_fs_workqueues(fsc); 1528 1529 /* 1530 * Though the kill_anon_super() will finally trigger the 1531 * sync_filesystem() anyway, we still need to do it here and 1532 * then bump the stage of shutdown. This will allow us to 1533 * drop any further message, which will increase the inodes' 1534 * i_count reference counters but makes no sense any more, 1535 * from MDSs. 1536 * 1537 * Without this when evicting the inodes it may fail in the 1538 * kill_anon_super(), which will trigger a warning when 1539 * destroying the fscrypt keyring and then possibly trigger 1540 * a further crash in ceph module when the iput() tries to 1541 * evict the inodes later. 1542 */ 1543 sync_filesystem(s); 1544 1545 spin_lock(&mdsc->stopping_lock); 1546 mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING; 1547 wait = !!atomic_read(&mdsc->stopping_blockers); 1548 spin_unlock(&mdsc->stopping_lock); 1549 1550 if (wait && atomic_read(&mdsc->stopping_blockers)) { 1551 long timeleft = wait_for_completion_killable_timeout( 1552 &mdsc->stopping_waiter, 1553 fsc->client->options->mount_timeout); 1554 if (!timeleft) /* timed out */ 1555 pr_warn("umount timed out, %ld\n", timeleft); 1556 else if (timeleft < 0) /* killed */ 1557 pr_warn("umount was killed, %ld\n", timeleft); 1558 } 1559 1560 mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; 1561 kill_anon_super(s); 1562 1563 fsc->client->extra_mon_dispatch = NULL; 1564 ceph_fs_debugfs_cleanup(fsc); 1565 1566 ceph_fscache_unregister_fs(fsc); 1567 1568 destroy_fs_client(fsc); 1569 } 1570 1571 static struct file_system_type ceph_fs_type = { 1572 .owner = THIS_MODULE, 1573 .name = "ceph", 1574 .init_fs_context = ceph_init_fs_context, 1575 .kill_sb = ceph_kill_sb, 1576 .fs_flags = FS_RENAME_DOES_D_MOVE, 1577 }; 1578 MODULE_ALIAS_FS("ceph"); 1579 1580 int ceph_force_reconnect(struct super_block *sb) 1581 { 1582 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1583 int err = 0; 1584 1585 fsc->mount_state = CEPH_MOUNT_RECOVER; 1586 __ceph_umount_begin(fsc); 1587 1588 /* Make sure all page caches get invalidated. 1589 * see remove_session_caps_cb() */ 1590 flush_workqueue(fsc->inode_wq); 1591 1592 /* In case that we were blocklisted. This also reset 1593 * all mon/osd connections */ 1594 ceph_reset_client_addr(fsc->client); 1595 1596 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1597 1598 fsc->blocklisted = false; 1599 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1600 1601 if (sb->s_root) { 1602 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1603 CEPH_STAT_CAP_INODE, true); 1604 } 1605 return err; 1606 } 1607 1608 static int __init init_ceph(void) 1609 { 1610 int ret = init_caches(); 1611 if (ret) 1612 goto out; 1613 1614 ceph_flock_init(); 1615 ret = register_filesystem(&ceph_fs_type); 1616 if (ret) 1617 goto out_caches; 1618 1619 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1620 1621 return 0; 1622 1623 out_caches: 1624 destroy_caches(); 1625 out: 1626 return ret; 1627 } 1628 1629 static void __exit exit_ceph(void) 1630 { 1631 dout("exit_ceph\n"); 1632 unregister_filesystem(&ceph_fs_type); 1633 destroy_caches(); 1634 } 1635 1636 static int param_set_metrics(const char *val, const struct kernel_param *kp) 1637 { 1638 struct ceph_fs_client *fsc; 1639 int ret; 1640 1641 ret = param_set_bool(val, kp); 1642 if (ret) { 1643 pr_err("Failed to parse sending metrics switch value '%s'\n", 1644 val); 1645 return ret; 1646 } else if (!disable_send_metrics) { 1647 // wake up all the mds clients 1648 spin_lock(&ceph_fsc_lock); 1649 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1650 metric_schedule_delayed(&fsc->mdsc->metric); 1651 } 1652 spin_unlock(&ceph_fsc_lock); 1653 } 1654 1655 return 0; 1656 } 1657 1658 static const struct kernel_param_ops param_ops_metrics = { 1659 .set = param_set_metrics, 1660 .get = param_get_bool, 1661 }; 1662 1663 bool disable_send_metrics = false; 1664 module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); 1665 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1666 1667 /* for both v1 and v2 syntax */ 1668 static bool mount_support = true; 1669 static const struct kernel_param_ops param_ops_mount_syntax = { 1670 .get = param_get_bool, 1671 }; 1672 module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); 1673 module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); 1674 1675 module_init(init_ceph); 1676 module_exit(exit_ceph); 1677 1678 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1679 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1680 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1681 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1682 MODULE_LICENSE("GPL"); 1683