1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 /* 31 * Ceph superblock operations 32 * 33 * Handle the basics of mounting, unmounting. 34 */ 35 36 /* 37 * super ops 38 */ 39 static void ceph_put_super(struct super_block *s) 40 { 41 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 42 43 dout("put_super\n"); 44 ceph_mdsc_close_sessions(fsc->mdsc); 45 } 46 47 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 48 { 49 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 50 struct ceph_mon_client *monc = &fsc->client->monc; 51 struct ceph_statfs st; 52 u64 fsid; 53 int err; 54 u64 data_pool; 55 56 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 57 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 58 } else { 59 data_pool = CEPH_NOPOOL; 60 } 61 62 dout("statfs\n"); 63 err = ceph_monc_do_statfs(monc, data_pool, &st); 64 if (err < 0) 65 return err; 66 67 /* fill in kstatfs */ 68 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 69 70 /* 71 * express utilization in terms of large blocks to avoid 72 * overflow on 32-bit machines. 73 * 74 * NOTE: for the time being, we make bsize == frsize to humor 75 * not-yet-ancient versions of glibc that are broken. 76 * Someday, we will probably want to report a real block 77 * size... whatever that may mean for a network file system! 78 */ 79 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 80 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 81 82 /* 83 * By default use root quota for stats; fallback to overall filesystem 84 * usage if using 'noquotadf' mount option or if the root dir doesn't 85 * have max_bytes quota set. 86 */ 87 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 88 !ceph_quota_update_statfs(fsc, buf)) { 89 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 92 } 93 94 buf->f_files = le64_to_cpu(st.num_objects); 95 buf->f_ffree = -1; 96 buf->f_namelen = NAME_MAX; 97 98 /* Must convert the fsid, for consistent values across arches */ 99 mutex_lock(&monc->mutex); 100 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 101 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 102 mutex_unlock(&monc->mutex); 103 104 buf->f_fsid.val[0] = fsid & 0xffffffff; 105 buf->f_fsid.val[1] = fsid >> 32; 106 107 return 0; 108 } 109 110 111 static int ceph_sync_fs(struct super_block *sb, int wait) 112 { 113 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 114 115 if (!wait) { 116 dout("sync_fs (non-blocking)\n"); 117 ceph_flush_dirty_caps(fsc->mdsc); 118 dout("sync_fs (non-blocking) done\n"); 119 return 0; 120 } 121 122 dout("sync_fs (blocking)\n"); 123 ceph_osdc_sync(&fsc->client->osdc); 124 ceph_mdsc_sync(fsc->mdsc); 125 dout("sync_fs (blocking) done\n"); 126 return 0; 127 } 128 129 /* 130 * mount options 131 */ 132 enum { 133 Opt_wsize, 134 Opt_rsize, 135 Opt_rasize, 136 Opt_caps_wanted_delay_min, 137 Opt_caps_wanted_delay_max, 138 Opt_caps_max, 139 Opt_readdir_max_entries, 140 Opt_readdir_max_bytes, 141 Opt_congestion_kb, 142 /* int args above */ 143 Opt_snapdirname, 144 Opt_mds_namespace, 145 Opt_recover_session, 146 Opt_source, 147 /* string args above */ 148 Opt_dirstat, 149 Opt_rbytes, 150 Opt_asyncreaddir, 151 Opt_dcache, 152 Opt_ino32, 153 Opt_fscache, 154 Opt_poolperm, 155 Opt_require_active_mds, 156 Opt_acl, 157 Opt_quotadf, 158 Opt_copyfrom, 159 }; 160 161 enum ceph_recover_session_mode { 162 ceph_recover_session_no, 163 ceph_recover_session_clean 164 }; 165 166 static const struct fs_parameter_enum ceph_mount_param_enums[] = { 167 { Opt_recover_session, "no", ceph_recover_session_no }, 168 { Opt_recover_session, "clean", ceph_recover_session_clean }, 169 {} 170 }; 171 172 static const struct fs_parameter_spec ceph_mount_param_specs[] = { 173 fsparam_flag_no ("acl", Opt_acl), 174 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 175 fsparam_u32 ("caps_max", Opt_caps_max), 176 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 177 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 178 fsparam_s32 ("write_congestion_kb", Opt_congestion_kb), 179 fsparam_flag_no ("copyfrom", Opt_copyfrom), 180 fsparam_flag_no ("dcache", Opt_dcache), 181 fsparam_flag_no ("dirstat", Opt_dirstat), 182 __fsparam (fs_param_is_string, "fsc", Opt_fscache, 183 fs_param_neg_with_no | fs_param_v_optional), 184 fsparam_flag_no ("ino32", Opt_ino32), 185 fsparam_string ("mds_namespace", Opt_mds_namespace), 186 fsparam_flag_no ("poolperm", Opt_poolperm), 187 fsparam_flag_no ("quotadf", Opt_quotadf), 188 fsparam_u32 ("rasize", Opt_rasize), 189 fsparam_flag_no ("rbytes", Opt_rbytes), 190 fsparam_s32 ("readdir_max_bytes", Opt_readdir_max_bytes), 191 fsparam_s32 ("readdir_max_entries", Opt_readdir_max_entries), 192 fsparam_enum ("recover_session", Opt_recover_session), 193 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 194 fsparam_u32 ("rsize", Opt_rsize), 195 fsparam_string ("snapdirname", Opt_snapdirname), 196 fsparam_string ("source", Opt_source), 197 fsparam_u32 ("wsize", Opt_wsize), 198 {} 199 }; 200 201 static const struct fs_parameter_description ceph_mount_parameters = { 202 .name = "ceph", 203 .specs = ceph_mount_param_specs, 204 .enums = ceph_mount_param_enums, 205 }; 206 207 struct ceph_parse_opts_ctx { 208 struct ceph_options *copts; 209 struct ceph_mount_options *opts; 210 }; 211 212 /* 213 * Parse the source parameter. Distinguish the server list from the path. 214 * Internally we do not include the leading '/' in the path. 215 * 216 * The source will look like: 217 * <server_spec>[,<server_spec>...]:[<path>] 218 * where 219 * <server_spec> is <ip>[:<port>] 220 * <path> is optional, but if present must begin with '/' 221 */ 222 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 223 { 224 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 225 struct ceph_mount_options *fsopt = pctx->opts; 226 char *dev_name = param->string, *dev_name_end; 227 int ret; 228 229 dout("%s '%s'\n", __func__, dev_name); 230 if (!dev_name || !*dev_name) 231 return invalf(fc, "ceph: Empty source"); 232 233 dev_name_end = strchr(dev_name, '/'); 234 if (dev_name_end) { 235 if (strlen(dev_name_end) > 1) { 236 kfree(fsopt->server_path); 237 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 238 if (!fsopt->server_path) 239 return -ENOMEM; 240 } 241 } else { 242 dev_name_end = dev_name + strlen(dev_name); 243 } 244 245 dev_name_end--; /* back up to ':' separator */ 246 if (dev_name_end < dev_name || *dev_name_end != ':') 247 return invalf(fc, "ceph: No path or : separator in source"); 248 249 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 250 if (fsopt->server_path) 251 dout("server path '%s'\n", fsopt->server_path); 252 253 ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, 254 pctx->copts, fc); 255 if (ret) 256 return ret; 257 258 fc->source = param->string; 259 param->string = NULL; 260 return 0; 261 } 262 263 static int ceph_parse_mount_param(struct fs_context *fc, 264 struct fs_parameter *param) 265 { 266 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 267 struct ceph_mount_options *fsopt = pctx->opts; 268 struct fs_parse_result result; 269 unsigned int mode; 270 int token, ret; 271 272 ret = ceph_parse_param(param, pctx->copts, fc); 273 if (ret != -ENOPARAM) 274 return ret; 275 276 token = fs_parse(fc, &ceph_mount_parameters, param, &result); 277 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 278 if (token < 0) 279 return token; 280 281 switch (token) { 282 case Opt_snapdirname: 283 kfree(fsopt->snapdir_name); 284 fsopt->snapdir_name = param->string; 285 param->string = NULL; 286 break; 287 case Opt_mds_namespace: 288 kfree(fsopt->mds_namespace); 289 fsopt->mds_namespace = param->string; 290 param->string = NULL; 291 break; 292 case Opt_recover_session: 293 mode = result.uint_32; 294 if (mode == ceph_recover_session_no) 295 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 296 else if (mode == ceph_recover_session_clean) 297 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 298 else 299 BUG(); 300 break; 301 case Opt_source: 302 if (fc->source) 303 return invalf(fc, "ceph: Multiple sources specified"); 304 return ceph_parse_source(param, fc); 305 case Opt_wsize: 306 if (result.uint_32 < PAGE_SIZE || 307 result.uint_32 > CEPH_MAX_WRITE_SIZE) 308 goto out_of_range; 309 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 310 break; 311 case Opt_rsize: 312 if (result.uint_32 < PAGE_SIZE || 313 result.uint_32 > CEPH_MAX_READ_SIZE) 314 goto out_of_range; 315 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 316 break; 317 case Opt_rasize: 318 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 319 break; 320 case Opt_caps_wanted_delay_min: 321 if (result.uint_32 < 1) 322 goto out_of_range; 323 fsopt->caps_wanted_delay_min = result.uint_32; 324 break; 325 case Opt_caps_wanted_delay_max: 326 if (result.uint_32 < 1) 327 goto out_of_range; 328 fsopt->caps_wanted_delay_max = result.uint_32; 329 break; 330 case Opt_caps_max: 331 fsopt->caps_max = result.uint_32; 332 break; 333 case Opt_readdir_max_entries: 334 if (result.uint_32 < 1) 335 goto out_of_range; 336 fsopt->max_readdir = result.uint_32; 337 break; 338 case Opt_readdir_max_bytes: 339 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 340 goto out_of_range; 341 fsopt->max_readdir_bytes = result.uint_32; 342 break; 343 case Opt_congestion_kb: 344 if (result.uint_32 < 1024) /* at least 1M */ 345 goto out_of_range; 346 fsopt->congestion_kb = result.uint_32; 347 break; 348 case Opt_dirstat: 349 if (!result.negated) 350 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 351 else 352 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 353 break; 354 case Opt_rbytes: 355 if (!result.negated) 356 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 357 else 358 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 359 break; 360 case Opt_asyncreaddir: 361 if (!result.negated) 362 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 363 else 364 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 365 break; 366 case Opt_dcache: 367 if (!result.negated) 368 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 369 else 370 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 371 break; 372 case Opt_ino32: 373 if (!result.negated) 374 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 375 else 376 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 377 break; 378 379 case Opt_fscache: 380 #ifdef CONFIG_CEPH_FSCACHE 381 kfree(fsopt->fscache_uniq); 382 fsopt->fscache_uniq = NULL; 383 if (result.negated) { 384 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 385 } else { 386 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 387 fsopt->fscache_uniq = param->string; 388 param->string = NULL; 389 } 390 break; 391 #else 392 return invalf(fc, "ceph: fscache support is disabled"); 393 #endif 394 case Opt_poolperm: 395 if (!result.negated) 396 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 397 else 398 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 399 break; 400 case Opt_require_active_mds: 401 if (!result.negated) 402 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 403 else 404 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 405 break; 406 case Opt_quotadf: 407 if (!result.negated) 408 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 409 else 410 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 411 break; 412 case Opt_copyfrom: 413 if (!result.negated) 414 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 415 else 416 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 417 break; 418 case Opt_acl: 419 if (!result.negated) { 420 #ifdef CONFIG_CEPH_FS_POSIX_ACL 421 fc->sb_flags |= SB_POSIXACL; 422 #else 423 return invalf(fc, "ceph: POSIX ACL support is disabled"); 424 #endif 425 } else { 426 fc->sb_flags &= ~SB_POSIXACL; 427 } 428 break; 429 default: 430 BUG(); 431 } 432 return 0; 433 434 out_of_range: 435 return invalf(fc, "ceph: %s out of range", param->key); 436 } 437 438 static void destroy_mount_options(struct ceph_mount_options *args) 439 { 440 dout("destroy_mount_options %p\n", args); 441 if (!args) 442 return; 443 444 kfree(args->snapdir_name); 445 kfree(args->mds_namespace); 446 kfree(args->server_path); 447 kfree(args->fscache_uniq); 448 kfree(args); 449 } 450 451 static int strcmp_null(const char *s1, const char *s2) 452 { 453 if (!s1 && !s2) 454 return 0; 455 if (s1 && !s2) 456 return -1; 457 if (!s1 && s2) 458 return 1; 459 return strcmp(s1, s2); 460 } 461 462 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 463 struct ceph_options *new_opt, 464 struct ceph_fs_client *fsc) 465 { 466 struct ceph_mount_options *fsopt1 = new_fsopt; 467 struct ceph_mount_options *fsopt2 = fsc->mount_options; 468 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 469 int ret; 470 471 ret = memcmp(fsopt1, fsopt2, ofs); 472 if (ret) 473 return ret; 474 475 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 476 if (ret) 477 return ret; 478 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 479 if (ret) 480 return ret; 481 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 482 if (ret) 483 return ret; 484 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 485 if (ret) 486 return ret; 487 488 return ceph_compare_options(new_opt, fsc->client); 489 } 490 491 /** 492 * ceph_show_options - Show mount options in /proc/mounts 493 * @m: seq_file to write to 494 * @root: root of that (sub)tree 495 */ 496 static int ceph_show_options(struct seq_file *m, struct dentry *root) 497 { 498 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 499 struct ceph_mount_options *fsopt = fsc->mount_options; 500 size_t pos; 501 int ret; 502 503 /* a comma between MNT/MS and client options */ 504 seq_putc(m, ','); 505 pos = m->count; 506 507 ret = ceph_print_client_options(m, fsc->client, false); 508 if (ret) 509 return ret; 510 511 /* retract our comma if no client options */ 512 if (m->count == pos) 513 m->count--; 514 515 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 516 seq_puts(m, ",dirstat"); 517 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 518 seq_puts(m, ",rbytes"); 519 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 520 seq_puts(m, ",noasyncreaddir"); 521 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 522 seq_puts(m, ",nodcache"); 523 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 524 seq_puts(m, ",ino32"); 525 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 526 seq_show_option(m, "fsc", fsopt->fscache_uniq); 527 } 528 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 529 seq_puts(m, ",nopoolperm"); 530 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 531 seq_puts(m, ",noquotadf"); 532 533 #ifdef CONFIG_CEPH_FS_POSIX_ACL 534 if (root->d_sb->s_flags & SB_POSIXACL) 535 seq_puts(m, ",acl"); 536 else 537 seq_puts(m, ",noacl"); 538 #endif 539 540 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 541 seq_puts(m, ",copyfrom"); 542 543 if (fsopt->mds_namespace) 544 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 545 546 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 547 seq_show_option(m, "recover_session", "clean"); 548 549 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 550 seq_printf(m, ",wsize=%d", fsopt->wsize); 551 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 552 seq_printf(m, ",rsize=%d", fsopt->rsize); 553 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 554 seq_printf(m, ",rasize=%d", fsopt->rasize); 555 if (fsopt->congestion_kb != default_congestion_kb()) 556 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 557 if (fsopt->caps_max) 558 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 559 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 560 seq_printf(m, ",caps_wanted_delay_min=%d", 561 fsopt->caps_wanted_delay_min); 562 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 563 seq_printf(m, ",caps_wanted_delay_max=%d", 564 fsopt->caps_wanted_delay_max); 565 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 566 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 567 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 568 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 569 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 570 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 571 572 return 0; 573 } 574 575 /* 576 * handle any mon messages the standard library doesn't understand. 577 * return error if we don't either. 578 */ 579 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 580 { 581 struct ceph_fs_client *fsc = client->private; 582 int type = le16_to_cpu(msg->hdr.type); 583 584 switch (type) { 585 case CEPH_MSG_MDS_MAP: 586 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 587 return 0; 588 case CEPH_MSG_FS_MAP_USER: 589 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 590 return 0; 591 default: 592 return -1; 593 } 594 } 595 596 /* 597 * create a new fs client 598 * 599 * Success or not, this function consumes @fsopt and @opt. 600 */ 601 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 602 struct ceph_options *opt) 603 { 604 struct ceph_fs_client *fsc; 605 int page_count; 606 size_t size; 607 int err; 608 609 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 610 if (!fsc) { 611 err = -ENOMEM; 612 goto fail; 613 } 614 615 fsc->client = ceph_create_client(opt, fsc); 616 if (IS_ERR(fsc->client)) { 617 err = PTR_ERR(fsc->client); 618 goto fail; 619 } 620 opt = NULL; /* fsc->client now owns this */ 621 622 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 623 ceph_set_opt(fsc->client, ABORT_ON_FULL); 624 625 if (!fsopt->mds_namespace) { 626 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 627 0, true); 628 } else { 629 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 630 0, false); 631 } 632 633 fsc->mount_options = fsopt; 634 635 fsc->sb = NULL; 636 fsc->mount_state = CEPH_MOUNT_MOUNTING; 637 fsc->filp_gen = 1; 638 639 atomic_long_set(&fsc->writeback_count, 0); 640 641 err = -ENOMEM; 642 /* 643 * The number of concurrent works can be high but they don't need 644 * to be processed in parallel, limit concurrency. 645 */ 646 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 647 if (!fsc->inode_wq) 648 goto fail_client; 649 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 650 if (!fsc->cap_wq) 651 goto fail_inode_wq; 652 653 /* set up mempools */ 654 err = -ENOMEM; 655 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 656 size = sizeof (struct page *) * (page_count ? page_count : 1); 657 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 658 if (!fsc->wb_pagevec_pool) 659 goto fail_cap_wq; 660 661 return fsc; 662 663 fail_cap_wq: 664 destroy_workqueue(fsc->cap_wq); 665 fail_inode_wq: 666 destroy_workqueue(fsc->inode_wq); 667 fail_client: 668 ceph_destroy_client(fsc->client); 669 fail: 670 kfree(fsc); 671 if (opt) 672 ceph_destroy_options(opt); 673 destroy_mount_options(fsopt); 674 return ERR_PTR(err); 675 } 676 677 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 678 { 679 flush_workqueue(fsc->inode_wq); 680 flush_workqueue(fsc->cap_wq); 681 } 682 683 static void destroy_fs_client(struct ceph_fs_client *fsc) 684 { 685 dout("destroy_fs_client %p\n", fsc); 686 687 ceph_mdsc_destroy(fsc); 688 destroy_workqueue(fsc->inode_wq); 689 destroy_workqueue(fsc->cap_wq); 690 691 mempool_destroy(fsc->wb_pagevec_pool); 692 693 destroy_mount_options(fsc->mount_options); 694 695 ceph_destroy_client(fsc->client); 696 697 kfree(fsc); 698 dout("destroy_fs_client %p done\n", fsc); 699 } 700 701 /* 702 * caches 703 */ 704 struct kmem_cache *ceph_inode_cachep; 705 struct kmem_cache *ceph_cap_cachep; 706 struct kmem_cache *ceph_cap_flush_cachep; 707 struct kmem_cache *ceph_dentry_cachep; 708 struct kmem_cache *ceph_file_cachep; 709 struct kmem_cache *ceph_dir_file_cachep; 710 711 static void ceph_inode_init_once(void *foo) 712 { 713 struct ceph_inode_info *ci = foo; 714 inode_init_once(&ci->vfs_inode); 715 } 716 717 static int __init init_caches(void) 718 { 719 int error = -ENOMEM; 720 721 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 722 sizeof(struct ceph_inode_info), 723 __alignof__(struct ceph_inode_info), 724 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 725 SLAB_ACCOUNT, ceph_inode_init_once); 726 if (!ceph_inode_cachep) 727 return -ENOMEM; 728 729 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 730 if (!ceph_cap_cachep) 731 goto bad_cap; 732 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 733 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 734 if (!ceph_cap_flush_cachep) 735 goto bad_cap_flush; 736 737 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 738 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 739 if (!ceph_dentry_cachep) 740 goto bad_dentry; 741 742 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 743 if (!ceph_file_cachep) 744 goto bad_file; 745 746 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 747 if (!ceph_dir_file_cachep) 748 goto bad_dir_file; 749 750 error = ceph_fscache_register(); 751 if (error) 752 goto bad_fscache; 753 754 return 0; 755 756 bad_fscache: 757 kmem_cache_destroy(ceph_dir_file_cachep); 758 bad_dir_file: 759 kmem_cache_destroy(ceph_file_cachep); 760 bad_file: 761 kmem_cache_destroy(ceph_dentry_cachep); 762 bad_dentry: 763 kmem_cache_destroy(ceph_cap_flush_cachep); 764 bad_cap_flush: 765 kmem_cache_destroy(ceph_cap_cachep); 766 bad_cap: 767 kmem_cache_destroy(ceph_inode_cachep); 768 return error; 769 } 770 771 static void destroy_caches(void) 772 { 773 /* 774 * Make sure all delayed rcu free inodes are flushed before we 775 * destroy cache. 776 */ 777 rcu_barrier(); 778 779 kmem_cache_destroy(ceph_inode_cachep); 780 kmem_cache_destroy(ceph_cap_cachep); 781 kmem_cache_destroy(ceph_cap_flush_cachep); 782 kmem_cache_destroy(ceph_dentry_cachep); 783 kmem_cache_destroy(ceph_file_cachep); 784 kmem_cache_destroy(ceph_dir_file_cachep); 785 786 ceph_fscache_unregister(); 787 } 788 789 790 /* 791 * ceph_umount_begin - initiate forced umount. Tear down down the 792 * mount, skipping steps that may hang while waiting for server(s). 793 */ 794 static void ceph_umount_begin(struct super_block *sb) 795 { 796 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 797 798 dout("ceph_umount_begin - starting forced umount\n"); 799 if (!fsc) 800 return; 801 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 802 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 803 ceph_mdsc_force_umount(fsc->mdsc); 804 fsc->filp_gen++; // invalidate open files 805 } 806 807 static const struct super_operations ceph_super_ops = { 808 .alloc_inode = ceph_alloc_inode, 809 .free_inode = ceph_free_inode, 810 .write_inode = ceph_write_inode, 811 .drop_inode = generic_delete_inode, 812 .evict_inode = ceph_evict_inode, 813 .sync_fs = ceph_sync_fs, 814 .put_super = ceph_put_super, 815 .show_options = ceph_show_options, 816 .statfs = ceph_statfs, 817 .umount_begin = ceph_umount_begin, 818 }; 819 820 /* 821 * Bootstrap mount by opening the root directory. Note the mount 822 * @started time from caller, and time out if this takes too long. 823 */ 824 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 825 const char *path, 826 unsigned long started) 827 { 828 struct ceph_mds_client *mdsc = fsc->mdsc; 829 struct ceph_mds_request *req = NULL; 830 int err; 831 struct dentry *root; 832 833 /* open dir */ 834 dout("open_root_inode opening '%s'\n", path); 835 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 836 if (IS_ERR(req)) 837 return ERR_CAST(req); 838 req->r_path1 = kstrdup(path, GFP_NOFS); 839 if (!req->r_path1) { 840 root = ERR_PTR(-ENOMEM); 841 goto out; 842 } 843 844 req->r_ino1.ino = CEPH_INO_ROOT; 845 req->r_ino1.snap = CEPH_NOSNAP; 846 req->r_started = started; 847 req->r_timeout = fsc->client->options->mount_timeout; 848 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 849 req->r_num_caps = 2; 850 err = ceph_mdsc_do_request(mdsc, NULL, req); 851 if (err == 0) { 852 struct inode *inode = req->r_target_inode; 853 req->r_target_inode = NULL; 854 dout("open_root_inode success\n"); 855 root = d_make_root(inode); 856 if (!root) { 857 root = ERR_PTR(-ENOMEM); 858 goto out; 859 } 860 dout("open_root_inode success, root dentry is %p\n", root); 861 } else { 862 root = ERR_PTR(err); 863 } 864 out: 865 ceph_mdsc_put_request(req); 866 return root; 867 } 868 869 870 871 872 /* 873 * mount: join the ceph cluster, and open root directory. 874 */ 875 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 876 struct fs_context *fc) 877 { 878 int err; 879 unsigned long started = jiffies; /* note the start time */ 880 struct dentry *root; 881 882 dout("mount start %p\n", fsc); 883 mutex_lock(&fsc->client->mount_mutex); 884 885 if (!fsc->sb->s_root) { 886 const char *path; 887 err = __ceph_open_session(fsc->client, started); 888 if (err < 0) 889 goto out; 890 891 /* setup fscache */ 892 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 893 err = ceph_fscache_register_fs(fsc, fc); 894 if (err < 0) 895 goto out; 896 } 897 898 if (!fsc->mount_options->server_path) { 899 path = ""; 900 dout("mount opening path \\t\n"); 901 } else { 902 path = fsc->mount_options->server_path + 1; 903 dout("mount opening path %s\n", path); 904 } 905 906 ceph_fs_debugfs_init(fsc); 907 908 root = open_root_dentry(fsc, path, started); 909 if (IS_ERR(root)) { 910 err = PTR_ERR(root); 911 goto out; 912 } 913 fsc->sb->s_root = dget(root); 914 } else { 915 root = dget(fsc->sb->s_root); 916 } 917 918 fsc->mount_state = CEPH_MOUNT_MOUNTED; 919 dout("mount success\n"); 920 mutex_unlock(&fsc->client->mount_mutex); 921 return root; 922 923 out: 924 mutex_unlock(&fsc->client->mount_mutex); 925 return ERR_PTR(err); 926 } 927 928 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 929 { 930 struct ceph_fs_client *fsc = s->s_fs_info; 931 int ret; 932 933 dout("set_super %p\n", s); 934 935 s->s_maxbytes = MAX_LFS_FILESIZE; 936 937 s->s_xattr = ceph_xattr_handlers; 938 fsc->sb = s; 939 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 940 941 s->s_op = &ceph_super_ops; 942 s->s_d_op = &ceph_dentry_ops; 943 s->s_export_op = &ceph_export_ops; 944 945 s->s_time_gran = 1; 946 s->s_time_min = 0; 947 s->s_time_max = U32_MAX; 948 949 ret = set_anon_super_fc(s, fc); 950 if (ret != 0) 951 fsc->sb = NULL; 952 return ret; 953 } 954 955 /* 956 * share superblock if same fs AND options 957 */ 958 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 959 { 960 struct ceph_fs_client *new = fc->s_fs_info; 961 struct ceph_mount_options *fsopt = new->mount_options; 962 struct ceph_options *opt = new->client->options; 963 struct ceph_fs_client *other = ceph_sb_to_client(sb); 964 965 dout("ceph_compare_super %p\n", sb); 966 967 if (compare_mount_options(fsopt, opt, other)) { 968 dout("monitor(s)/mount options don't match\n"); 969 return 0; 970 } 971 if ((opt->flags & CEPH_OPT_FSID) && 972 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 973 dout("fsid doesn't match\n"); 974 return 0; 975 } 976 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 977 dout("flags differ\n"); 978 return 0; 979 } 980 return 1; 981 } 982 983 /* 984 * construct our own bdi so we can control readahead, etc. 985 */ 986 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 987 988 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 989 { 990 int err; 991 992 err = super_setup_bdi_name(sb, "ceph-%ld", 993 atomic_long_inc_return(&bdi_seq)); 994 if (err) 995 return err; 996 997 /* set ra_pages based on rasize mount option? */ 998 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 999 1000 /* set io_pages based on max osd read size */ 1001 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1002 1003 return 0; 1004 } 1005 1006 static int ceph_get_tree(struct fs_context *fc) 1007 { 1008 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1009 struct super_block *sb; 1010 struct ceph_fs_client *fsc; 1011 struct dentry *res; 1012 int (*compare_super)(struct super_block *, struct fs_context *) = 1013 ceph_compare_super; 1014 int err; 1015 1016 dout("ceph_get_tree\n"); 1017 1018 if (!fc->source) 1019 return invalf(fc, "ceph: No source"); 1020 1021 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1022 fc->sb_flags |= SB_POSIXACL; 1023 #endif 1024 1025 /* create client (which we may/may not use) */ 1026 fsc = create_fs_client(pctx->opts, pctx->copts); 1027 pctx->opts = NULL; 1028 pctx->copts = NULL; 1029 if (IS_ERR(fsc)) { 1030 err = PTR_ERR(fsc); 1031 goto out_final; 1032 } 1033 1034 err = ceph_mdsc_init(fsc); 1035 if (err < 0) 1036 goto out; 1037 1038 if (ceph_test_opt(fsc->client, NOSHARE)) 1039 compare_super = NULL; 1040 1041 fc->s_fs_info = fsc; 1042 sb = sget_fc(fc, compare_super, ceph_set_super); 1043 fc->s_fs_info = NULL; 1044 if (IS_ERR(sb)) { 1045 err = PTR_ERR(sb); 1046 goto out; 1047 } 1048 1049 if (ceph_sb_to_client(sb) != fsc) { 1050 destroy_fs_client(fsc); 1051 fsc = ceph_sb_to_client(sb); 1052 dout("get_sb got existing client %p\n", fsc); 1053 } else { 1054 dout("get_sb using new client %p\n", fsc); 1055 err = ceph_setup_bdi(sb, fsc); 1056 if (err < 0) 1057 goto out_splat; 1058 } 1059 1060 res = ceph_real_mount(fsc, fc); 1061 if (IS_ERR(res)) { 1062 err = PTR_ERR(res); 1063 goto out_splat; 1064 } 1065 dout("root %p inode %p ino %llx.%llx\n", res, 1066 d_inode(res), ceph_vinop(d_inode(res))); 1067 fc->root = fsc->sb->s_root; 1068 return 0; 1069 1070 out_splat: 1071 ceph_mdsc_close_sessions(fsc->mdsc); 1072 deactivate_locked_super(sb); 1073 goto out_final; 1074 1075 out: 1076 destroy_fs_client(fsc); 1077 out_final: 1078 dout("ceph_get_tree fail %d\n", err); 1079 return err; 1080 } 1081 1082 static void ceph_free_fc(struct fs_context *fc) 1083 { 1084 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1085 1086 if (pctx) { 1087 destroy_mount_options(pctx->opts); 1088 ceph_destroy_options(pctx->copts); 1089 kfree(pctx); 1090 } 1091 } 1092 1093 static int ceph_reconfigure_fc(struct fs_context *fc) 1094 { 1095 sync_filesystem(fc->root->d_sb); 1096 return 0; 1097 } 1098 1099 static const struct fs_context_operations ceph_context_ops = { 1100 .free = ceph_free_fc, 1101 .parse_param = ceph_parse_mount_param, 1102 .get_tree = ceph_get_tree, 1103 .reconfigure = ceph_reconfigure_fc, 1104 }; 1105 1106 /* 1107 * Set up the filesystem mount context. 1108 */ 1109 static int ceph_init_fs_context(struct fs_context *fc) 1110 { 1111 struct ceph_parse_opts_ctx *pctx; 1112 struct ceph_mount_options *fsopt; 1113 1114 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1115 if (!pctx) 1116 return -ENOMEM; 1117 1118 pctx->copts = ceph_alloc_options(); 1119 if (!pctx->copts) 1120 goto nomem; 1121 1122 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1123 if (!pctx->opts) 1124 goto nomem; 1125 1126 fsopt = pctx->opts; 1127 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1128 1129 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1130 fsopt->rsize = CEPH_MAX_READ_SIZE; 1131 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1132 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1133 if (!fsopt->snapdir_name) 1134 goto nomem; 1135 1136 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1137 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1138 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1139 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1140 fsopt->congestion_kb = default_congestion_kb(); 1141 1142 fc->fs_private = pctx; 1143 fc->ops = &ceph_context_ops; 1144 return 0; 1145 1146 nomem: 1147 destroy_mount_options(pctx->opts); 1148 ceph_destroy_options(pctx->copts); 1149 kfree(pctx); 1150 return -ENOMEM; 1151 } 1152 1153 static void ceph_kill_sb(struct super_block *s) 1154 { 1155 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1156 dev_t dev = s->s_dev; 1157 1158 dout("kill_sb %p\n", s); 1159 1160 ceph_mdsc_pre_umount(fsc->mdsc); 1161 flush_fs_workqueues(fsc); 1162 1163 generic_shutdown_super(s); 1164 1165 fsc->client->extra_mon_dispatch = NULL; 1166 ceph_fs_debugfs_cleanup(fsc); 1167 1168 ceph_fscache_unregister_fs(fsc); 1169 1170 destroy_fs_client(fsc); 1171 free_anon_bdev(dev); 1172 } 1173 1174 static struct file_system_type ceph_fs_type = { 1175 .owner = THIS_MODULE, 1176 .name = "ceph", 1177 .init_fs_context = ceph_init_fs_context, 1178 .kill_sb = ceph_kill_sb, 1179 .fs_flags = FS_RENAME_DOES_D_MOVE, 1180 }; 1181 MODULE_ALIAS_FS("ceph"); 1182 1183 int ceph_force_reconnect(struct super_block *sb) 1184 { 1185 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1186 int err = 0; 1187 1188 ceph_umount_begin(sb); 1189 1190 /* Make sure all page caches get invalidated. 1191 * see remove_session_caps_cb() */ 1192 flush_workqueue(fsc->inode_wq); 1193 1194 /* In case that we were blacklisted. This also reset 1195 * all mon/osd connections */ 1196 ceph_reset_client_addr(fsc->client); 1197 1198 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1199 1200 fsc->blacklisted = false; 1201 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1202 1203 if (sb->s_root) { 1204 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1205 CEPH_STAT_CAP_INODE, true); 1206 } 1207 return err; 1208 } 1209 1210 static int __init init_ceph(void) 1211 { 1212 int ret = init_caches(); 1213 if (ret) 1214 goto out; 1215 1216 ceph_flock_init(); 1217 ret = register_filesystem(&ceph_fs_type); 1218 if (ret) 1219 goto out_caches; 1220 1221 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1222 1223 return 0; 1224 1225 out_caches: 1226 destroy_caches(); 1227 out: 1228 return ret; 1229 } 1230 1231 static void __exit exit_ceph(void) 1232 { 1233 dout("exit_ceph\n"); 1234 unregister_filesystem(&ceph_fs_type); 1235 destroy_caches(); 1236 } 1237 1238 module_init(init_ceph); 1239 module_exit(exit_ceph); 1240 1241 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1242 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1243 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1244 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1245 MODULE_LICENSE("GPL"); 1246