1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 #include "crypto.h" 24 25 #include <linux/ceph/ceph_features.h> 26 #include <linux/ceph/decode.h> 27 #include <linux/ceph/mon_client.h> 28 #include <linux/ceph/auth.h> 29 #include <linux/ceph/debugfs.h> 30 31 #include <uapi/linux/magic.h> 32 33 static DEFINE_SPINLOCK(ceph_fsc_lock); 34 static LIST_HEAD(ceph_fsc_list); 35 36 /* 37 * Ceph superblock operations 38 * 39 * Handle the basics of mounting, unmounting. 40 */ 41 42 /* 43 * super ops 44 */ 45 static void ceph_put_super(struct super_block *s) 46 { 47 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); 48 49 dout("put_super\n"); 50 ceph_fscrypt_free_dummy_policy(fsc); 51 ceph_mdsc_close_sessions(fsc->mdsc); 52 } 53 54 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 55 { 56 struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry)); 57 struct ceph_mon_client *monc = &fsc->client->monc; 58 struct ceph_statfs st; 59 int i, err; 60 u64 data_pool; 61 62 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 63 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 64 } else { 65 data_pool = CEPH_NOPOOL; 66 } 67 68 dout("statfs\n"); 69 err = ceph_monc_do_statfs(monc, data_pool, &st); 70 if (err < 0) 71 return err; 72 73 /* fill in kstatfs */ 74 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 75 76 /* 77 * Express utilization in terms of large blocks to avoid 78 * overflow on 32-bit machines. 79 */ 80 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 81 82 /* 83 * By default use root quota for stats; fallback to overall filesystem 84 * usage if using 'noquotadf' mount option or if the root dir doesn't 85 * have max_bytes quota set. 86 */ 87 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 88 !ceph_quota_update_statfs(fsc, buf)) { 89 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 92 } 93 94 /* 95 * NOTE: for the time being, we make bsize == frsize to humor 96 * not-yet-ancient versions of glibc that are broken. 97 * Someday, we will probably want to report a real block 98 * size... whatever that may mean for a network file system! 99 */ 100 buf->f_bsize = buf->f_frsize; 101 102 buf->f_files = le64_to_cpu(st.num_objects); 103 buf->f_ffree = -1; 104 buf->f_namelen = NAME_MAX; 105 106 /* Must convert the fsid, for consistent values across arches */ 107 buf->f_fsid.val[0] = 0; 108 mutex_lock(&monc->mutex); 109 for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) 110 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); 111 mutex_unlock(&monc->mutex); 112 113 /* fold the fs_cluster_id into the upper bits */ 114 buf->f_fsid.val[1] = monc->fs_cluster_id; 115 116 return 0; 117 } 118 119 static int ceph_sync_fs(struct super_block *sb, int wait) 120 { 121 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 122 123 if (!wait) { 124 dout("sync_fs (non-blocking)\n"); 125 ceph_flush_dirty_caps(fsc->mdsc); 126 dout("sync_fs (non-blocking) done\n"); 127 return 0; 128 } 129 130 dout("sync_fs (blocking)\n"); 131 ceph_osdc_sync(&fsc->client->osdc); 132 ceph_mdsc_sync(fsc->mdsc); 133 dout("sync_fs (blocking) done\n"); 134 return 0; 135 } 136 137 /* 138 * mount options 139 */ 140 enum { 141 Opt_wsize, 142 Opt_rsize, 143 Opt_rasize, 144 Opt_caps_wanted_delay_min, 145 Opt_caps_wanted_delay_max, 146 Opt_caps_max, 147 Opt_readdir_max_entries, 148 Opt_readdir_max_bytes, 149 Opt_congestion_kb, 150 /* int args above */ 151 Opt_snapdirname, 152 Opt_mds_namespace, 153 Opt_recover_session, 154 Opt_source, 155 Opt_mon_addr, 156 Opt_test_dummy_encryption, 157 /* string args above */ 158 Opt_dirstat, 159 Opt_rbytes, 160 Opt_asyncreaddir, 161 Opt_dcache, 162 Opt_ino32, 163 Opt_fscache, 164 Opt_poolperm, 165 Opt_require_active_mds, 166 Opt_acl, 167 Opt_quotadf, 168 Opt_copyfrom, 169 Opt_wsync, 170 Opt_pagecache, 171 Opt_sparseread, 172 }; 173 174 enum ceph_recover_session_mode { 175 ceph_recover_session_no, 176 ceph_recover_session_clean 177 }; 178 179 static const struct constant_table ceph_param_recover[] = { 180 { "no", ceph_recover_session_no }, 181 { "clean", ceph_recover_session_clean }, 182 {} 183 }; 184 185 static const struct fs_parameter_spec ceph_mount_parameters[] = { 186 fsparam_flag_no ("acl", Opt_acl), 187 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 188 fsparam_s32 ("caps_max", Opt_caps_max), 189 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 190 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 191 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 192 fsparam_flag_no ("copyfrom", Opt_copyfrom), 193 fsparam_flag_no ("dcache", Opt_dcache), 194 fsparam_flag_no ("dirstat", Opt_dirstat), 195 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 196 fsparam_string ("fsc", Opt_fscache), // fsc=... 197 fsparam_flag_no ("ino32", Opt_ino32), 198 fsparam_string ("mds_namespace", Opt_mds_namespace), 199 fsparam_string ("mon_addr", Opt_mon_addr), 200 fsparam_flag_no ("poolperm", Opt_poolperm), 201 fsparam_flag_no ("quotadf", Opt_quotadf), 202 fsparam_u32 ("rasize", Opt_rasize), 203 fsparam_flag_no ("rbytes", Opt_rbytes), 204 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 205 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 206 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 207 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 208 fsparam_u32 ("rsize", Opt_rsize), 209 fsparam_string ("snapdirname", Opt_snapdirname), 210 fsparam_string ("source", Opt_source), 211 fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption), 212 fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption), 213 fsparam_u32 ("wsize", Opt_wsize), 214 fsparam_flag_no ("wsync", Opt_wsync), 215 fsparam_flag_no ("pagecache", Opt_pagecache), 216 fsparam_flag_no ("sparseread", Opt_sparseread), 217 {} 218 }; 219 220 struct ceph_parse_opts_ctx { 221 struct ceph_options *copts; 222 struct ceph_mount_options *opts; 223 }; 224 225 /* 226 * Remove adjacent slashes and then the trailing slash, unless it is 227 * the only remaining character. 228 * 229 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 230 */ 231 static void canonicalize_path(char *path) 232 { 233 int i, j = 0; 234 235 for (i = 0; path[i] != '\0'; i++) { 236 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 237 path[j++] = path[i]; 238 } 239 240 if (j > 1 && path[j - 1] == '/') 241 j--; 242 path[j] = '\0'; 243 } 244 245 /* 246 * Check if the mds namespace in ceph_mount_options matches 247 * the passed in namespace string. First time match (when 248 * ->mds_namespace is NULL) is treated specially, since 249 * ->mds_namespace needs to be initialized by the caller. 250 */ 251 static int namespace_equals(struct ceph_mount_options *fsopt, 252 const char *namespace, size_t len) 253 { 254 return !(fsopt->mds_namespace && 255 (strlen(fsopt->mds_namespace) != len || 256 strncmp(fsopt->mds_namespace, namespace, len))); 257 } 258 259 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, 260 struct fs_context *fc) 261 { 262 int r; 263 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 264 struct ceph_mount_options *fsopt = pctx->opts; 265 266 if (*dev_name_end != ':') 267 return invalfc(fc, "separator ':' missing in source"); 268 269 r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name, 270 pctx->copts, fc->log.log, ','); 271 if (r) 272 return r; 273 274 fsopt->new_dev_syntax = false; 275 return 0; 276 } 277 278 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end, 279 struct fs_context *fc) 280 { 281 size_t len; 282 struct ceph_fsid fsid; 283 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 284 struct ceph_mount_options *fsopt = pctx->opts; 285 char *fsid_start, *fs_name_start; 286 287 if (*dev_name_end != '=') { 288 dout("separator '=' missing in source"); 289 return -EINVAL; 290 } 291 292 fsid_start = strchr(dev_name, '@'); 293 if (!fsid_start) 294 return invalfc(fc, "missing cluster fsid"); 295 ++fsid_start; /* start of cluster fsid */ 296 297 fs_name_start = strchr(fsid_start, '.'); 298 if (!fs_name_start) 299 return invalfc(fc, "missing file system name"); 300 301 if (ceph_parse_fsid(fsid_start, &fsid)) 302 return invalfc(fc, "Invalid FSID"); 303 304 ++fs_name_start; /* start of file system name */ 305 len = dev_name_end - fs_name_start; 306 307 if (!namespace_equals(fsopt, fs_name_start, len)) 308 return invalfc(fc, "Mismatching mds_namespace"); 309 kfree(fsopt->mds_namespace); 310 fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL); 311 if (!fsopt->mds_namespace) 312 return -ENOMEM; 313 dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace); 314 315 fsopt->new_dev_syntax = true; 316 return 0; 317 } 318 319 /* 320 * Parse the source parameter for new device format. Distinguish the device 321 * spec from the path. Try parsing new device format and fallback to old 322 * format if needed. 323 * 324 * New device syntax will looks like: 325 * <device_spec>=/<path> 326 * where 327 * <device_spec> is name@fsid.fsname 328 * <path> is optional, but if present must begin with '/' 329 * (monitor addresses are passed via mount option) 330 * 331 * Old device syntax is: 332 * <server_spec>[,<server_spec>...]:[<path>] 333 * where 334 * <server_spec> is <ip>[:<port>] 335 * <path> is optional, but if present must begin with '/' 336 */ 337 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 338 { 339 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 340 struct ceph_mount_options *fsopt = pctx->opts; 341 char *dev_name = param->string, *dev_name_end; 342 int ret; 343 344 dout("%s '%s'\n", __func__, dev_name); 345 if (!dev_name || !*dev_name) 346 return invalfc(fc, "Empty source"); 347 348 dev_name_end = strchr(dev_name, '/'); 349 if (dev_name_end) { 350 /* 351 * The server_path will include the whole chars from userland 352 * including the leading '/'. 353 */ 354 kfree(fsopt->server_path); 355 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 356 if (!fsopt->server_path) 357 return -ENOMEM; 358 359 canonicalize_path(fsopt->server_path); 360 } else { 361 dev_name_end = dev_name + strlen(dev_name); 362 } 363 364 dev_name_end--; /* back up to separator */ 365 if (dev_name_end < dev_name) 366 return invalfc(fc, "Path missing in source"); 367 368 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 369 if (fsopt->server_path) 370 dout("server path '%s'\n", fsopt->server_path); 371 372 dout("trying new device syntax"); 373 ret = ceph_parse_new_source(dev_name, dev_name_end, fc); 374 if (ret) { 375 if (ret != -EINVAL) 376 return ret; 377 dout("trying old device syntax"); 378 ret = ceph_parse_old_source(dev_name, dev_name_end, fc); 379 if (ret) 380 return ret; 381 } 382 383 fc->source = param->string; 384 param->string = NULL; 385 return 0; 386 } 387 388 static int ceph_parse_mon_addr(struct fs_parameter *param, 389 struct fs_context *fc) 390 { 391 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 392 struct ceph_mount_options *fsopt = pctx->opts; 393 394 kfree(fsopt->mon_addr); 395 fsopt->mon_addr = param->string; 396 param->string = NULL; 397 398 return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr), 399 pctx->copts, fc->log.log, '/'); 400 } 401 402 static int ceph_parse_mount_param(struct fs_context *fc, 403 struct fs_parameter *param) 404 { 405 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 406 struct ceph_mount_options *fsopt = pctx->opts; 407 struct fs_parse_result result; 408 unsigned int mode; 409 int token, ret; 410 411 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 412 if (ret != -ENOPARAM) 413 return ret; 414 415 token = fs_parse(fc, ceph_mount_parameters, param, &result); 416 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 417 if (token < 0) 418 return token; 419 420 switch (token) { 421 case Opt_snapdirname: 422 kfree(fsopt->snapdir_name); 423 fsopt->snapdir_name = param->string; 424 param->string = NULL; 425 break; 426 case Opt_mds_namespace: 427 if (!namespace_equals(fsopt, param->string, strlen(param->string))) 428 return invalfc(fc, "Mismatching mds_namespace"); 429 kfree(fsopt->mds_namespace); 430 fsopt->mds_namespace = param->string; 431 param->string = NULL; 432 break; 433 case Opt_recover_session: 434 mode = result.uint_32; 435 if (mode == ceph_recover_session_no) 436 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 437 else if (mode == ceph_recover_session_clean) 438 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 439 else 440 BUG(); 441 break; 442 case Opt_source: 443 if (fc->source) 444 return invalfc(fc, "Multiple sources specified"); 445 return ceph_parse_source(param, fc); 446 case Opt_mon_addr: 447 return ceph_parse_mon_addr(param, fc); 448 case Opt_wsize: 449 if (result.uint_32 < PAGE_SIZE || 450 result.uint_32 > CEPH_MAX_WRITE_SIZE) 451 goto out_of_range; 452 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 453 break; 454 case Opt_rsize: 455 if (result.uint_32 < PAGE_SIZE || 456 result.uint_32 > CEPH_MAX_READ_SIZE) 457 goto out_of_range; 458 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 459 break; 460 case Opt_rasize: 461 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 462 break; 463 case Opt_caps_wanted_delay_min: 464 if (result.uint_32 < 1) 465 goto out_of_range; 466 fsopt->caps_wanted_delay_min = result.uint_32; 467 break; 468 case Opt_caps_wanted_delay_max: 469 if (result.uint_32 < 1) 470 goto out_of_range; 471 fsopt->caps_wanted_delay_max = result.uint_32; 472 break; 473 case Opt_caps_max: 474 if (result.int_32 < 0) 475 goto out_of_range; 476 fsopt->caps_max = result.int_32; 477 break; 478 case Opt_readdir_max_entries: 479 if (result.uint_32 < 1) 480 goto out_of_range; 481 fsopt->max_readdir = result.uint_32; 482 break; 483 case Opt_readdir_max_bytes: 484 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 485 goto out_of_range; 486 fsopt->max_readdir_bytes = result.uint_32; 487 break; 488 case Opt_congestion_kb: 489 if (result.uint_32 < 1024) /* at least 1M */ 490 goto out_of_range; 491 fsopt->congestion_kb = result.uint_32; 492 break; 493 case Opt_dirstat: 494 if (!result.negated) 495 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 496 else 497 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 498 break; 499 case Opt_rbytes: 500 if (!result.negated) 501 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 502 else 503 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 504 break; 505 case Opt_asyncreaddir: 506 if (!result.negated) 507 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 508 else 509 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 510 break; 511 case Opt_dcache: 512 if (!result.negated) 513 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 514 else 515 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 516 break; 517 case Opt_ino32: 518 if (!result.negated) 519 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 520 else 521 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 522 break; 523 524 case Opt_fscache: 525 #ifdef CONFIG_CEPH_FSCACHE 526 kfree(fsopt->fscache_uniq); 527 fsopt->fscache_uniq = NULL; 528 if (result.negated) { 529 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 530 } else { 531 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 532 fsopt->fscache_uniq = param->string; 533 param->string = NULL; 534 } 535 break; 536 #else 537 return invalfc(fc, "fscache support is disabled"); 538 #endif 539 case Opt_poolperm: 540 if (!result.negated) 541 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 542 else 543 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 544 break; 545 case Opt_require_active_mds: 546 if (!result.negated) 547 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 548 else 549 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 550 break; 551 case Opt_quotadf: 552 if (!result.negated) 553 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 554 else 555 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 556 break; 557 case Opt_copyfrom: 558 if (!result.negated) 559 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 560 else 561 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 562 break; 563 case Opt_acl: 564 if (!result.negated) { 565 #ifdef CONFIG_CEPH_FS_POSIX_ACL 566 fc->sb_flags |= SB_POSIXACL; 567 #else 568 return invalfc(fc, "POSIX ACL support is disabled"); 569 #endif 570 } else { 571 fc->sb_flags &= ~SB_POSIXACL; 572 } 573 break; 574 case Opt_wsync: 575 if (!result.negated) 576 fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; 577 else 578 fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; 579 break; 580 case Opt_pagecache: 581 if (result.negated) 582 fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE; 583 else 584 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE; 585 break; 586 case Opt_sparseread: 587 if (result.negated) 588 fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD; 589 else 590 fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD; 591 break; 592 case Opt_test_dummy_encryption: 593 #ifdef CONFIG_FS_ENCRYPTION 594 fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy); 595 ret = fscrypt_parse_test_dummy_encryption(param, 596 &fsopt->dummy_enc_policy); 597 if (ret == -EINVAL) { 598 warnfc(fc, "Value of option \"%s\" is unrecognized", 599 param->key); 600 } else if (ret == -EEXIST) { 601 warnfc(fc, "Conflicting test_dummy_encryption options"); 602 ret = -EINVAL; 603 } 604 #else 605 warnfc(fc, 606 "FS encryption not supported: test_dummy_encryption mount option ignored"); 607 #endif 608 break; 609 default: 610 BUG(); 611 } 612 return 0; 613 614 out_of_range: 615 return invalfc(fc, "%s out of range", param->key); 616 } 617 618 static void destroy_mount_options(struct ceph_mount_options *args) 619 { 620 dout("destroy_mount_options %p\n", args); 621 if (!args) 622 return; 623 624 kfree(args->snapdir_name); 625 kfree(args->mds_namespace); 626 kfree(args->server_path); 627 kfree(args->fscache_uniq); 628 kfree(args->mon_addr); 629 fscrypt_free_dummy_policy(&args->dummy_enc_policy); 630 kfree(args); 631 } 632 633 static int strcmp_null(const char *s1, const char *s2) 634 { 635 if (!s1 && !s2) 636 return 0; 637 if (s1 && !s2) 638 return -1; 639 if (!s1 && s2) 640 return 1; 641 return strcmp(s1, s2); 642 } 643 644 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 645 struct ceph_options *new_opt, 646 struct ceph_fs_client *fsc) 647 { 648 struct ceph_mount_options *fsopt1 = new_fsopt; 649 struct ceph_mount_options *fsopt2 = fsc->mount_options; 650 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 651 int ret; 652 653 ret = memcmp(fsopt1, fsopt2, ofs); 654 if (ret) 655 return ret; 656 657 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 658 if (ret) 659 return ret; 660 661 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 662 if (ret) 663 return ret; 664 665 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 666 if (ret) 667 return ret; 668 669 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 670 if (ret) 671 return ret; 672 673 ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr); 674 if (ret) 675 return ret; 676 677 return ceph_compare_options(new_opt, fsc->client); 678 } 679 680 /** 681 * ceph_show_options - Show mount options in /proc/mounts 682 * @m: seq_file to write to 683 * @root: root of that (sub)tree 684 */ 685 static int ceph_show_options(struct seq_file *m, struct dentry *root) 686 { 687 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb); 688 struct ceph_mount_options *fsopt = fsc->mount_options; 689 size_t pos; 690 int ret; 691 692 /* a comma between MNT/MS and client options */ 693 seq_putc(m, ','); 694 pos = m->count; 695 696 ret = ceph_print_client_options(m, fsc->client, false); 697 if (ret) 698 return ret; 699 700 /* retract our comma if no client options */ 701 if (m->count == pos) 702 m->count--; 703 704 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 705 seq_puts(m, ",dirstat"); 706 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 707 seq_puts(m, ",rbytes"); 708 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 709 seq_puts(m, ",noasyncreaddir"); 710 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 711 seq_puts(m, ",nodcache"); 712 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 713 seq_puts(m, ",ino32"); 714 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 715 seq_show_option(m, "fsc", fsopt->fscache_uniq); 716 } 717 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 718 seq_puts(m, ",nopoolperm"); 719 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 720 seq_puts(m, ",noquotadf"); 721 722 #ifdef CONFIG_CEPH_FS_POSIX_ACL 723 if (root->d_sb->s_flags & SB_POSIXACL) 724 seq_puts(m, ",acl"); 725 else 726 seq_puts(m, ",noacl"); 727 #endif 728 729 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 730 seq_puts(m, ",copyfrom"); 731 732 /* dump mds_namespace when old device syntax is in use */ 733 if (fsopt->mds_namespace && !fsopt->new_dev_syntax) 734 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 735 736 if (fsopt->mon_addr) 737 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr); 738 739 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 740 seq_show_option(m, "recover_session", "clean"); 741 742 if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) 743 seq_puts(m, ",wsync"); 744 if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE) 745 seq_puts(m, ",nopagecache"); 746 if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD) 747 seq_puts(m, ",sparseread"); 748 749 fscrypt_show_test_dummy_encryption(m, ',', root->d_sb); 750 751 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 752 seq_printf(m, ",wsize=%u", fsopt->wsize); 753 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 754 seq_printf(m, ",rsize=%u", fsopt->rsize); 755 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 756 seq_printf(m, ",rasize=%u", fsopt->rasize); 757 if (fsopt->congestion_kb != default_congestion_kb()) 758 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 759 if (fsopt->caps_max) 760 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 761 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 762 seq_printf(m, ",caps_wanted_delay_min=%u", 763 fsopt->caps_wanted_delay_min); 764 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 765 seq_printf(m, ",caps_wanted_delay_max=%u", 766 fsopt->caps_wanted_delay_max); 767 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 768 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 769 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 770 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 771 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 772 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 773 774 return 0; 775 } 776 777 /* 778 * handle any mon messages the standard library doesn't understand. 779 * return error if we don't either. 780 */ 781 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 782 { 783 struct ceph_fs_client *fsc = client->private; 784 int type = le16_to_cpu(msg->hdr.type); 785 786 switch (type) { 787 case CEPH_MSG_MDS_MAP: 788 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 789 return 0; 790 case CEPH_MSG_FS_MAP_USER: 791 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 792 return 0; 793 default: 794 return -1; 795 } 796 } 797 798 /* 799 * create a new fs client 800 * 801 * Success or not, this function consumes @fsopt and @opt. 802 */ 803 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 804 struct ceph_options *opt) 805 { 806 struct ceph_fs_client *fsc; 807 int err; 808 809 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 810 if (!fsc) { 811 err = -ENOMEM; 812 goto fail; 813 } 814 815 fsc->client = ceph_create_client(opt, fsc); 816 if (IS_ERR(fsc->client)) { 817 err = PTR_ERR(fsc->client); 818 goto fail; 819 } 820 opt = NULL; /* fsc->client now owns this */ 821 822 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 823 ceph_set_opt(fsc->client, ABORT_ON_FULL); 824 825 if (!fsopt->mds_namespace) { 826 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 827 0, true); 828 } else { 829 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 830 0, false); 831 } 832 833 fsc->mount_options = fsopt; 834 835 fsc->sb = NULL; 836 fsc->mount_state = CEPH_MOUNT_MOUNTING; 837 fsc->filp_gen = 1; 838 fsc->have_copy_from2 = true; 839 840 atomic_long_set(&fsc->writeback_count, 0); 841 fsc->write_congested = false; 842 843 err = -ENOMEM; 844 /* 845 * The number of concurrent works can be high but they don't need 846 * to be processed in parallel, limit concurrency. 847 */ 848 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 849 if (!fsc->inode_wq) 850 goto fail_client; 851 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 852 if (!fsc->cap_wq) 853 goto fail_inode_wq; 854 855 hash_init(fsc->async_unlink_conflict); 856 spin_lock_init(&fsc->async_unlink_conflict_lock); 857 858 spin_lock(&ceph_fsc_lock); 859 list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); 860 spin_unlock(&ceph_fsc_lock); 861 862 return fsc; 863 864 fail_inode_wq: 865 destroy_workqueue(fsc->inode_wq); 866 fail_client: 867 ceph_destroy_client(fsc->client); 868 fail: 869 kfree(fsc); 870 if (opt) 871 ceph_destroy_options(opt); 872 destroy_mount_options(fsopt); 873 return ERR_PTR(err); 874 } 875 876 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 877 { 878 flush_workqueue(fsc->inode_wq); 879 flush_workqueue(fsc->cap_wq); 880 } 881 882 static void destroy_fs_client(struct ceph_fs_client *fsc) 883 { 884 dout("destroy_fs_client %p\n", fsc); 885 886 spin_lock(&ceph_fsc_lock); 887 list_del(&fsc->metric_wakeup); 888 spin_unlock(&ceph_fsc_lock); 889 890 ceph_mdsc_destroy(fsc); 891 destroy_workqueue(fsc->inode_wq); 892 destroy_workqueue(fsc->cap_wq); 893 894 destroy_mount_options(fsc->mount_options); 895 896 ceph_destroy_client(fsc->client); 897 898 kfree(fsc); 899 dout("destroy_fs_client %p done\n", fsc); 900 } 901 902 /* 903 * caches 904 */ 905 struct kmem_cache *ceph_inode_cachep; 906 struct kmem_cache *ceph_cap_cachep; 907 struct kmem_cache *ceph_cap_snap_cachep; 908 struct kmem_cache *ceph_cap_flush_cachep; 909 struct kmem_cache *ceph_dentry_cachep; 910 struct kmem_cache *ceph_file_cachep; 911 struct kmem_cache *ceph_dir_file_cachep; 912 struct kmem_cache *ceph_mds_request_cachep; 913 mempool_t *ceph_wb_pagevec_pool; 914 915 static void ceph_inode_init_once(void *foo) 916 { 917 struct ceph_inode_info *ci = foo; 918 inode_init_once(&ci->netfs.inode); 919 } 920 921 static int __init init_caches(void) 922 { 923 int error = -ENOMEM; 924 925 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 926 sizeof(struct ceph_inode_info), 927 __alignof__(struct ceph_inode_info), 928 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 929 SLAB_ACCOUNT, ceph_inode_init_once); 930 if (!ceph_inode_cachep) 931 return -ENOMEM; 932 933 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 934 if (!ceph_cap_cachep) 935 goto bad_cap; 936 ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD); 937 if (!ceph_cap_snap_cachep) 938 goto bad_cap_snap; 939 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 940 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 941 if (!ceph_cap_flush_cachep) 942 goto bad_cap_flush; 943 944 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 945 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 946 if (!ceph_dentry_cachep) 947 goto bad_dentry; 948 949 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 950 if (!ceph_file_cachep) 951 goto bad_file; 952 953 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 954 if (!ceph_dir_file_cachep) 955 goto bad_dir_file; 956 957 ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); 958 if (!ceph_mds_request_cachep) 959 goto bad_mds_req; 960 961 ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); 962 if (!ceph_wb_pagevec_pool) 963 goto bad_pagevec_pool; 964 965 return 0; 966 967 bad_pagevec_pool: 968 kmem_cache_destroy(ceph_mds_request_cachep); 969 bad_mds_req: 970 kmem_cache_destroy(ceph_dir_file_cachep); 971 bad_dir_file: 972 kmem_cache_destroy(ceph_file_cachep); 973 bad_file: 974 kmem_cache_destroy(ceph_dentry_cachep); 975 bad_dentry: 976 kmem_cache_destroy(ceph_cap_flush_cachep); 977 bad_cap_flush: 978 kmem_cache_destroy(ceph_cap_snap_cachep); 979 bad_cap_snap: 980 kmem_cache_destroy(ceph_cap_cachep); 981 bad_cap: 982 kmem_cache_destroy(ceph_inode_cachep); 983 return error; 984 } 985 986 static void destroy_caches(void) 987 { 988 /* 989 * Make sure all delayed rcu free inodes are flushed before we 990 * destroy cache. 991 */ 992 rcu_barrier(); 993 994 kmem_cache_destroy(ceph_inode_cachep); 995 kmem_cache_destroy(ceph_cap_cachep); 996 kmem_cache_destroy(ceph_cap_snap_cachep); 997 kmem_cache_destroy(ceph_cap_flush_cachep); 998 kmem_cache_destroy(ceph_dentry_cachep); 999 kmem_cache_destroy(ceph_file_cachep); 1000 kmem_cache_destroy(ceph_dir_file_cachep); 1001 kmem_cache_destroy(ceph_mds_request_cachep); 1002 mempool_destroy(ceph_wb_pagevec_pool); 1003 } 1004 1005 static void __ceph_umount_begin(struct ceph_fs_client *fsc) 1006 { 1007 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 1008 ceph_mdsc_force_umount(fsc->mdsc); 1009 fsc->filp_gen++; // invalidate open files 1010 } 1011 1012 /* 1013 * ceph_umount_begin - initiate forced umount. Tear down the 1014 * mount, skipping steps that may hang while waiting for server(s). 1015 */ 1016 void ceph_umount_begin(struct super_block *sb) 1017 { 1018 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1019 1020 dout("ceph_umount_begin - starting forced umount\n"); 1021 if (!fsc) 1022 return; 1023 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 1024 __ceph_umount_begin(fsc); 1025 } 1026 1027 static const struct super_operations ceph_super_ops = { 1028 .alloc_inode = ceph_alloc_inode, 1029 .free_inode = ceph_free_inode, 1030 .write_inode = ceph_write_inode, 1031 .drop_inode = generic_delete_inode, 1032 .evict_inode = ceph_evict_inode, 1033 .sync_fs = ceph_sync_fs, 1034 .put_super = ceph_put_super, 1035 .show_options = ceph_show_options, 1036 .statfs = ceph_statfs, 1037 .umount_begin = ceph_umount_begin, 1038 }; 1039 1040 /* 1041 * Bootstrap mount by opening the root directory. Note the mount 1042 * @started time from caller, and time out if this takes too long. 1043 */ 1044 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 1045 const char *path, 1046 unsigned long started) 1047 { 1048 struct ceph_mds_client *mdsc = fsc->mdsc; 1049 struct ceph_mds_request *req = NULL; 1050 int err; 1051 struct dentry *root; 1052 1053 /* open dir */ 1054 dout("open_root_inode opening '%s'\n", path); 1055 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1056 if (IS_ERR(req)) 1057 return ERR_CAST(req); 1058 req->r_path1 = kstrdup(path, GFP_NOFS); 1059 if (!req->r_path1) { 1060 root = ERR_PTR(-ENOMEM); 1061 goto out; 1062 } 1063 1064 req->r_ino1.ino = CEPH_INO_ROOT; 1065 req->r_ino1.snap = CEPH_NOSNAP; 1066 req->r_started = started; 1067 req->r_timeout = fsc->client->options->mount_timeout; 1068 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 1069 req->r_num_caps = 2; 1070 err = ceph_mdsc_do_request(mdsc, NULL, req); 1071 if (err == 0) { 1072 struct inode *inode = req->r_target_inode; 1073 req->r_target_inode = NULL; 1074 dout("open_root_inode success\n"); 1075 root = d_make_root(inode); 1076 if (!root) { 1077 root = ERR_PTR(-ENOMEM); 1078 goto out; 1079 } 1080 dout("open_root_inode success, root dentry is %p\n", root); 1081 } else { 1082 root = ERR_PTR(err); 1083 } 1084 out: 1085 ceph_mdsc_put_request(req); 1086 return root; 1087 } 1088 1089 #ifdef CONFIG_FS_ENCRYPTION 1090 static int ceph_apply_test_dummy_encryption(struct super_block *sb, 1091 struct fs_context *fc, 1092 struct ceph_mount_options *fsopt) 1093 { 1094 struct ceph_fs_client *fsc = sb->s_fs_info; 1095 1096 if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy)) 1097 return 0; 1098 1099 /* No changing encryption context on remount. */ 1100 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && 1101 !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { 1102 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy, 1103 &fsc->fsc_dummy_enc_policy)) 1104 return 0; 1105 errorfc(fc, "Can't set test_dummy_encryption on remount"); 1106 return -EINVAL; 1107 } 1108 1109 /* Also make sure fsopt doesn't contain a conflicting value. */ 1110 if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) { 1111 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy, 1112 &fsc->fsc_dummy_enc_policy)) 1113 return 0; 1114 errorfc(fc, "Conflicting test_dummy_encryption options"); 1115 return -EINVAL; 1116 } 1117 1118 fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy; 1119 memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy)); 1120 1121 warnfc(fc, "test_dummy_encryption mode enabled"); 1122 return 0; 1123 } 1124 #else 1125 static int ceph_apply_test_dummy_encryption(struct super_block *sb, 1126 struct fs_context *fc, 1127 struct ceph_mount_options *fsopt) 1128 { 1129 return 0; 1130 } 1131 #endif 1132 1133 /* 1134 * mount: join the ceph cluster, and open root directory. 1135 */ 1136 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 1137 struct fs_context *fc) 1138 { 1139 int err; 1140 unsigned long started = jiffies; /* note the start time */ 1141 struct dentry *root; 1142 1143 dout("mount start %p\n", fsc); 1144 mutex_lock(&fsc->client->mount_mutex); 1145 1146 if (!fsc->sb->s_root) { 1147 const char *path = fsc->mount_options->server_path ? 1148 fsc->mount_options->server_path + 1 : ""; 1149 1150 err = __ceph_open_session(fsc->client, started); 1151 if (err < 0) 1152 goto out; 1153 1154 /* setup fscache */ 1155 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 1156 err = ceph_fscache_register_fs(fsc, fc); 1157 if (err < 0) 1158 goto out; 1159 } 1160 1161 err = ceph_apply_test_dummy_encryption(fsc->sb, fc, 1162 fsc->mount_options); 1163 if (err) 1164 goto out; 1165 1166 dout("mount opening path '%s'\n", path); 1167 1168 ceph_fs_debugfs_init(fsc); 1169 1170 root = open_root_dentry(fsc, path, started); 1171 if (IS_ERR(root)) { 1172 err = PTR_ERR(root); 1173 goto out; 1174 } 1175 fsc->sb->s_root = dget(root); 1176 } else { 1177 root = dget(fsc->sb->s_root); 1178 } 1179 1180 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1181 dout("mount success\n"); 1182 mutex_unlock(&fsc->client->mount_mutex); 1183 return root; 1184 1185 out: 1186 mutex_unlock(&fsc->client->mount_mutex); 1187 ceph_fscrypt_free_dummy_policy(fsc); 1188 return ERR_PTR(err); 1189 } 1190 1191 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1192 { 1193 struct ceph_fs_client *fsc = s->s_fs_info; 1194 int ret; 1195 1196 dout("set_super %p\n", s); 1197 1198 s->s_maxbytes = MAX_LFS_FILESIZE; 1199 1200 s->s_xattr = ceph_xattr_handlers; 1201 fsc->sb = s; 1202 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1203 1204 s->s_op = &ceph_super_ops; 1205 s->s_d_op = &ceph_dentry_ops; 1206 s->s_export_op = &ceph_export_ops; 1207 1208 s->s_time_gran = 1; 1209 s->s_time_min = 0; 1210 s->s_time_max = U32_MAX; 1211 s->s_flags |= SB_NODIRATIME | SB_NOATIME; 1212 1213 ceph_fscrypt_set_ops(s); 1214 1215 ret = set_anon_super_fc(s, fc); 1216 if (ret != 0) 1217 fsc->sb = NULL; 1218 return ret; 1219 } 1220 1221 /* 1222 * share superblock if same fs AND options 1223 */ 1224 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1225 { 1226 struct ceph_fs_client *new = fc->s_fs_info; 1227 struct ceph_mount_options *fsopt = new->mount_options; 1228 struct ceph_options *opt = new->client->options; 1229 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1230 1231 dout("ceph_compare_super %p\n", sb); 1232 1233 if (compare_mount_options(fsopt, opt, fsc)) { 1234 dout("monitor(s)/mount options don't match\n"); 1235 return 0; 1236 } 1237 if ((opt->flags & CEPH_OPT_FSID) && 1238 ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { 1239 dout("fsid doesn't match\n"); 1240 return 0; 1241 } 1242 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1243 dout("flags differ\n"); 1244 return 0; 1245 } 1246 1247 if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { 1248 dout("client is blocklisted (and CLEANRECOVER is not set)\n"); 1249 return 0; 1250 } 1251 1252 if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { 1253 dout("client has been forcibly unmounted\n"); 1254 return 0; 1255 } 1256 1257 return 1; 1258 } 1259 1260 /* 1261 * construct our own bdi so we can control readahead, etc. 1262 */ 1263 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1264 1265 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1266 { 1267 int err; 1268 1269 err = super_setup_bdi_name(sb, "ceph-%ld", 1270 atomic_long_inc_return(&bdi_seq)); 1271 if (err) 1272 return err; 1273 1274 /* set ra_pages based on rasize mount option? */ 1275 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1276 1277 /* set io_pages based on max osd read size */ 1278 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1279 1280 return 0; 1281 } 1282 1283 static int ceph_get_tree(struct fs_context *fc) 1284 { 1285 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1286 struct ceph_mount_options *fsopt = pctx->opts; 1287 struct super_block *sb; 1288 struct ceph_fs_client *fsc; 1289 struct dentry *res; 1290 int (*compare_super)(struct super_block *, struct fs_context *) = 1291 ceph_compare_super; 1292 int err; 1293 1294 dout("ceph_get_tree\n"); 1295 1296 if (!fc->source) 1297 return invalfc(fc, "No source"); 1298 if (fsopt->new_dev_syntax && !fsopt->mon_addr) 1299 return invalfc(fc, "No monitor address"); 1300 1301 /* create client (which we may/may not use) */ 1302 fsc = create_fs_client(pctx->opts, pctx->copts); 1303 pctx->opts = NULL; 1304 pctx->copts = NULL; 1305 if (IS_ERR(fsc)) { 1306 err = PTR_ERR(fsc); 1307 goto out_final; 1308 } 1309 1310 err = ceph_mdsc_init(fsc); 1311 if (err < 0) 1312 goto out; 1313 1314 if (ceph_test_opt(fsc->client, NOSHARE)) 1315 compare_super = NULL; 1316 1317 fc->s_fs_info = fsc; 1318 sb = sget_fc(fc, compare_super, ceph_set_super); 1319 fc->s_fs_info = NULL; 1320 if (IS_ERR(sb)) { 1321 err = PTR_ERR(sb); 1322 goto out; 1323 } 1324 1325 if (ceph_sb_to_fs_client(sb) != fsc) { 1326 destroy_fs_client(fsc); 1327 fsc = ceph_sb_to_fs_client(sb); 1328 dout("get_sb got existing client %p\n", fsc); 1329 } else { 1330 dout("get_sb using new client %p\n", fsc); 1331 err = ceph_setup_bdi(sb, fsc); 1332 if (err < 0) 1333 goto out_splat; 1334 } 1335 1336 res = ceph_real_mount(fsc, fc); 1337 if (IS_ERR(res)) { 1338 err = PTR_ERR(res); 1339 goto out_splat; 1340 } 1341 dout("root %p inode %p ino %llx.%llx\n", res, 1342 d_inode(res), ceph_vinop(d_inode(res))); 1343 fc->root = fsc->sb->s_root; 1344 return 0; 1345 1346 out_splat: 1347 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1348 pr_info("No mds server is up or the cluster is laggy\n"); 1349 err = -EHOSTUNREACH; 1350 } 1351 1352 ceph_mdsc_close_sessions(fsc->mdsc); 1353 deactivate_locked_super(sb); 1354 goto out_final; 1355 1356 out: 1357 destroy_fs_client(fsc); 1358 out_final: 1359 dout("ceph_get_tree fail %d\n", err); 1360 return err; 1361 } 1362 1363 static void ceph_free_fc(struct fs_context *fc) 1364 { 1365 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1366 1367 if (pctx) { 1368 destroy_mount_options(pctx->opts); 1369 ceph_destroy_options(pctx->copts); 1370 kfree(pctx); 1371 } 1372 } 1373 1374 static int ceph_reconfigure_fc(struct fs_context *fc) 1375 { 1376 int err; 1377 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1378 struct ceph_mount_options *fsopt = pctx->opts; 1379 struct super_block *sb = fc->root->d_sb; 1380 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1381 1382 err = ceph_apply_test_dummy_encryption(sb, fc, fsopt); 1383 if (err) 1384 return err; 1385 1386 if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) 1387 ceph_set_mount_opt(fsc, ASYNC_DIROPS); 1388 else 1389 ceph_clear_mount_opt(fsc, ASYNC_DIROPS); 1390 1391 if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD) 1392 ceph_set_mount_opt(fsc, SPARSEREAD); 1393 else 1394 ceph_clear_mount_opt(fsc, SPARSEREAD); 1395 1396 if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) { 1397 kfree(fsc->mount_options->mon_addr); 1398 fsc->mount_options->mon_addr = fsopt->mon_addr; 1399 fsopt->mon_addr = NULL; 1400 pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); 1401 } 1402 1403 sync_filesystem(sb); 1404 return 0; 1405 } 1406 1407 static const struct fs_context_operations ceph_context_ops = { 1408 .free = ceph_free_fc, 1409 .parse_param = ceph_parse_mount_param, 1410 .get_tree = ceph_get_tree, 1411 .reconfigure = ceph_reconfigure_fc, 1412 }; 1413 1414 /* 1415 * Set up the filesystem mount context. 1416 */ 1417 static int ceph_init_fs_context(struct fs_context *fc) 1418 { 1419 struct ceph_parse_opts_ctx *pctx; 1420 struct ceph_mount_options *fsopt; 1421 1422 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1423 if (!pctx) 1424 return -ENOMEM; 1425 1426 pctx->copts = ceph_alloc_options(); 1427 if (!pctx->copts) 1428 goto nomem; 1429 1430 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1431 if (!pctx->opts) 1432 goto nomem; 1433 1434 fsopt = pctx->opts; 1435 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1436 1437 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1438 fsopt->rsize = CEPH_MAX_READ_SIZE; 1439 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1440 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1441 if (!fsopt->snapdir_name) 1442 goto nomem; 1443 1444 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1445 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1446 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1447 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1448 fsopt->congestion_kb = default_congestion_kb(); 1449 1450 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1451 fc->sb_flags |= SB_POSIXACL; 1452 #endif 1453 1454 fc->fs_private = pctx; 1455 fc->ops = &ceph_context_ops; 1456 return 0; 1457 1458 nomem: 1459 destroy_mount_options(pctx->opts); 1460 ceph_destroy_options(pctx->copts); 1461 kfree(pctx); 1462 return -ENOMEM; 1463 } 1464 1465 /* 1466 * Return true if it successfully increases the blocker counter, 1467 * or false if the mdsc is in stopping and flushed state. 1468 */ 1469 static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc) 1470 { 1471 spin_lock(&mdsc->stopping_lock); 1472 if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) { 1473 spin_unlock(&mdsc->stopping_lock); 1474 return false; 1475 } 1476 atomic_inc(&mdsc->stopping_blockers); 1477 spin_unlock(&mdsc->stopping_lock); 1478 return true; 1479 } 1480 1481 static void __dec_stopping_blocker(struct ceph_mds_client *mdsc) 1482 { 1483 spin_lock(&mdsc->stopping_lock); 1484 if (!atomic_dec_return(&mdsc->stopping_blockers) && 1485 mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) 1486 complete_all(&mdsc->stopping_waiter); 1487 spin_unlock(&mdsc->stopping_lock); 1488 } 1489 1490 /* For metadata IO requests */ 1491 bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc, 1492 struct ceph_mds_session *session) 1493 { 1494 mutex_lock(&session->s_mutex); 1495 inc_session_sequence(session); 1496 mutex_unlock(&session->s_mutex); 1497 1498 return __inc_stopping_blocker(mdsc); 1499 } 1500 1501 void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc) 1502 { 1503 __dec_stopping_blocker(mdsc); 1504 } 1505 1506 /* For data IO requests */ 1507 bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc) 1508 { 1509 return __inc_stopping_blocker(mdsc); 1510 } 1511 1512 void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc) 1513 { 1514 __dec_stopping_blocker(mdsc); 1515 } 1516 1517 static void ceph_kill_sb(struct super_block *s) 1518 { 1519 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); 1520 struct ceph_mds_client *mdsc = fsc->mdsc; 1521 bool wait; 1522 1523 dout("kill_sb %p\n", s); 1524 1525 ceph_mdsc_pre_umount(mdsc); 1526 flush_fs_workqueues(fsc); 1527 1528 /* 1529 * Though the kill_anon_super() will finally trigger the 1530 * sync_filesystem() anyway, we still need to do it here and 1531 * then bump the stage of shutdown. This will allow us to 1532 * drop any further message, which will increase the inodes' 1533 * i_count reference counters but makes no sense any more, 1534 * from MDSs. 1535 * 1536 * Without this when evicting the inodes it may fail in the 1537 * kill_anon_super(), which will trigger a warning when 1538 * destroying the fscrypt keyring and then possibly trigger 1539 * a further crash in ceph module when the iput() tries to 1540 * evict the inodes later. 1541 */ 1542 sync_filesystem(s); 1543 1544 spin_lock(&mdsc->stopping_lock); 1545 mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING; 1546 wait = !!atomic_read(&mdsc->stopping_blockers); 1547 spin_unlock(&mdsc->stopping_lock); 1548 1549 if (wait && atomic_read(&mdsc->stopping_blockers)) { 1550 long timeleft = wait_for_completion_killable_timeout( 1551 &mdsc->stopping_waiter, 1552 fsc->client->options->mount_timeout); 1553 if (!timeleft) /* timed out */ 1554 pr_warn("umount timed out, %ld\n", timeleft); 1555 else if (timeleft < 0) /* killed */ 1556 pr_warn("umount was killed, %ld\n", timeleft); 1557 } 1558 1559 mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; 1560 kill_anon_super(s); 1561 1562 fsc->client->extra_mon_dispatch = NULL; 1563 ceph_fs_debugfs_cleanup(fsc); 1564 1565 ceph_fscache_unregister_fs(fsc); 1566 1567 destroy_fs_client(fsc); 1568 } 1569 1570 static struct file_system_type ceph_fs_type = { 1571 .owner = THIS_MODULE, 1572 .name = "ceph", 1573 .init_fs_context = ceph_init_fs_context, 1574 .kill_sb = ceph_kill_sb, 1575 .fs_flags = FS_RENAME_DOES_D_MOVE, 1576 }; 1577 MODULE_ALIAS_FS("ceph"); 1578 1579 int ceph_force_reconnect(struct super_block *sb) 1580 { 1581 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1582 int err = 0; 1583 1584 fsc->mount_state = CEPH_MOUNT_RECOVER; 1585 __ceph_umount_begin(fsc); 1586 1587 /* Make sure all page caches get invalidated. 1588 * see remove_session_caps_cb() */ 1589 flush_workqueue(fsc->inode_wq); 1590 1591 /* In case that we were blocklisted. This also reset 1592 * all mon/osd connections */ 1593 ceph_reset_client_addr(fsc->client); 1594 1595 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1596 1597 fsc->blocklisted = false; 1598 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1599 1600 if (sb->s_root) { 1601 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1602 CEPH_STAT_CAP_INODE, true); 1603 } 1604 return err; 1605 } 1606 1607 static int __init init_ceph(void) 1608 { 1609 int ret = init_caches(); 1610 if (ret) 1611 goto out; 1612 1613 ceph_flock_init(); 1614 ret = register_filesystem(&ceph_fs_type); 1615 if (ret) 1616 goto out_caches; 1617 1618 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1619 1620 return 0; 1621 1622 out_caches: 1623 destroy_caches(); 1624 out: 1625 return ret; 1626 } 1627 1628 static void __exit exit_ceph(void) 1629 { 1630 dout("exit_ceph\n"); 1631 unregister_filesystem(&ceph_fs_type); 1632 destroy_caches(); 1633 } 1634 1635 static int param_set_metrics(const char *val, const struct kernel_param *kp) 1636 { 1637 struct ceph_fs_client *fsc; 1638 int ret; 1639 1640 ret = param_set_bool(val, kp); 1641 if (ret) { 1642 pr_err("Failed to parse sending metrics switch value '%s'\n", 1643 val); 1644 return ret; 1645 } else if (!disable_send_metrics) { 1646 // wake up all the mds clients 1647 spin_lock(&ceph_fsc_lock); 1648 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { 1649 metric_schedule_delayed(&fsc->mdsc->metric); 1650 } 1651 spin_unlock(&ceph_fsc_lock); 1652 } 1653 1654 return 0; 1655 } 1656 1657 static const struct kernel_param_ops param_ops_metrics = { 1658 .set = param_set_metrics, 1659 .get = param_get_bool, 1660 }; 1661 1662 bool disable_send_metrics = false; 1663 module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); 1664 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); 1665 1666 /* for both v1 and v2 syntax */ 1667 static bool mount_support = true; 1668 static const struct kernel_param_ops param_ops_mount_syntax = { 1669 .get = param_get_bool, 1670 }; 1671 module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); 1672 module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); 1673 1674 module_init(init_ceph); 1675 module_exit(exit_ceph); 1676 1677 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1678 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1679 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1680 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1681 MODULE_LICENSE("GPL"); 1682