1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 /* 31 * Ceph superblock operations 32 * 33 * Handle the basics of mounting, unmounting. 34 */ 35 36 /* 37 * super ops 38 */ 39 static void ceph_put_super(struct super_block *s) 40 { 41 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 42 43 dout("put_super\n"); 44 ceph_mdsc_close_sessions(fsc->mdsc); 45 } 46 47 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 48 { 49 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 50 struct ceph_mon_client *monc = &fsc->client->monc; 51 struct ceph_statfs st; 52 u64 fsid; 53 int err; 54 u64 data_pool; 55 56 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 57 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 58 } else { 59 data_pool = CEPH_NOPOOL; 60 } 61 62 dout("statfs\n"); 63 err = ceph_monc_do_statfs(monc, data_pool, &st); 64 if (err < 0) 65 return err; 66 67 /* fill in kstatfs */ 68 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 69 70 /* 71 * express utilization in terms of large blocks to avoid 72 * overflow on 32-bit machines. 73 * 74 * NOTE: for the time being, we make bsize == frsize to humor 75 * not-yet-ancient versions of glibc that are broken. 76 * Someday, we will probably want to report a real block 77 * size... whatever that may mean for a network file system! 78 */ 79 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 80 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 81 82 /* 83 * By default use root quota for stats; fallback to overall filesystem 84 * usage if using 'noquotadf' mount option or if the root dir doesn't 85 * have max_bytes quota set. 86 */ 87 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 88 !ceph_quota_update_statfs(fsc, buf)) { 89 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 92 } 93 94 buf->f_files = le64_to_cpu(st.num_objects); 95 buf->f_ffree = -1; 96 buf->f_namelen = NAME_MAX; 97 98 /* Must convert the fsid, for consistent values across arches */ 99 mutex_lock(&monc->mutex); 100 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 101 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 102 mutex_unlock(&monc->mutex); 103 104 buf->f_fsid.val[0] = fsid & 0xffffffff; 105 buf->f_fsid.val[1] = fsid >> 32; 106 107 return 0; 108 } 109 110 static int ceph_sync_fs(struct super_block *sb, int wait) 111 { 112 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 113 114 if (!wait) { 115 dout("sync_fs (non-blocking)\n"); 116 ceph_flush_dirty_caps(fsc->mdsc); 117 dout("sync_fs (non-blocking) done\n"); 118 return 0; 119 } 120 121 dout("sync_fs (blocking)\n"); 122 ceph_osdc_sync(&fsc->client->osdc); 123 ceph_mdsc_sync(fsc->mdsc); 124 dout("sync_fs (blocking) done\n"); 125 return 0; 126 } 127 128 /* 129 * mount options 130 */ 131 enum { 132 Opt_wsize, 133 Opt_rsize, 134 Opt_rasize, 135 Opt_caps_wanted_delay_min, 136 Opt_caps_wanted_delay_max, 137 Opt_caps_max, 138 Opt_readdir_max_entries, 139 Opt_readdir_max_bytes, 140 Opt_congestion_kb, 141 /* int args above */ 142 Opt_snapdirname, 143 Opt_mds_namespace, 144 Opt_recover_session, 145 Opt_source, 146 /* string args above */ 147 Opt_dirstat, 148 Opt_rbytes, 149 Opt_asyncreaddir, 150 Opt_dcache, 151 Opt_ino32, 152 Opt_fscache, 153 Opt_poolperm, 154 Opt_require_active_mds, 155 Opt_acl, 156 Opt_quotadf, 157 Opt_copyfrom, 158 }; 159 160 enum ceph_recover_session_mode { 161 ceph_recover_session_no, 162 ceph_recover_session_clean 163 }; 164 165 static const struct constant_table ceph_param_recover[] = { 166 { "no", ceph_recover_session_no }, 167 { "clean", ceph_recover_session_clean }, 168 {} 169 }; 170 171 static const struct fs_parameter_spec ceph_mount_parameters[] = { 172 fsparam_flag_no ("acl", Opt_acl), 173 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 174 fsparam_s32 ("caps_max", Opt_caps_max), 175 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 176 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 177 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 178 fsparam_flag_no ("copyfrom", Opt_copyfrom), 179 fsparam_flag_no ("dcache", Opt_dcache), 180 fsparam_flag_no ("dirstat", Opt_dirstat), 181 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 182 fsparam_string ("fsc", Opt_fscache), // fsc=... 183 fsparam_flag_no ("ino32", Opt_ino32), 184 fsparam_string ("mds_namespace", Opt_mds_namespace), 185 fsparam_flag_no ("poolperm", Opt_poolperm), 186 fsparam_flag_no ("quotadf", Opt_quotadf), 187 fsparam_u32 ("rasize", Opt_rasize), 188 fsparam_flag_no ("rbytes", Opt_rbytes), 189 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 190 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 191 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 192 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 193 fsparam_u32 ("rsize", Opt_rsize), 194 fsparam_string ("snapdirname", Opt_snapdirname), 195 fsparam_string ("source", Opt_source), 196 fsparam_u32 ("wsize", Opt_wsize), 197 {} 198 }; 199 200 struct ceph_parse_opts_ctx { 201 struct ceph_options *copts; 202 struct ceph_mount_options *opts; 203 }; 204 205 /* 206 * Remove adjacent slashes and then the trailing slash, unless it is 207 * the only remaining character. 208 * 209 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". 210 */ 211 static void canonicalize_path(char *path) 212 { 213 int i, j = 0; 214 215 for (i = 0; path[i] != '\0'; i++) { 216 if (path[i] != '/' || j < 1 || path[j - 1] != '/') 217 path[j++] = path[i]; 218 } 219 220 if (j > 1 && path[j - 1] == '/') 221 j--; 222 path[j] = '\0'; 223 } 224 225 /* 226 * Parse the source parameter. Distinguish the server list from the path. 227 * 228 * The source will look like: 229 * <server_spec>[,<server_spec>...]:[<path>] 230 * where 231 * <server_spec> is <ip>[:<port>] 232 * <path> is optional, but if present must begin with '/' 233 */ 234 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 235 { 236 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 237 struct ceph_mount_options *fsopt = pctx->opts; 238 char *dev_name = param->string, *dev_name_end; 239 int ret; 240 241 dout("%s '%s'\n", __func__, dev_name); 242 if (!dev_name || !*dev_name) 243 return invalfc(fc, "Empty source"); 244 245 dev_name_end = strchr(dev_name, '/'); 246 if (dev_name_end) { 247 /* 248 * The server_path will include the whole chars from userland 249 * including the leading '/'. 250 */ 251 kfree(fsopt->server_path); 252 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 253 if (!fsopt->server_path) 254 return -ENOMEM; 255 256 canonicalize_path(fsopt->server_path); 257 } else { 258 dev_name_end = dev_name + strlen(dev_name); 259 } 260 261 dev_name_end--; /* back up to ':' separator */ 262 if (dev_name_end < dev_name || *dev_name_end != ':') 263 return invalfc(fc, "No path or : separator in source"); 264 265 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 266 if (fsopt->server_path) 267 dout("server path '%s'\n", fsopt->server_path); 268 269 ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, 270 pctx->copts, fc->log.log); 271 if (ret) 272 return ret; 273 274 fc->source = param->string; 275 param->string = NULL; 276 return 0; 277 } 278 279 static int ceph_parse_mount_param(struct fs_context *fc, 280 struct fs_parameter *param) 281 { 282 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 283 struct ceph_mount_options *fsopt = pctx->opts; 284 struct fs_parse_result result; 285 unsigned int mode; 286 int token, ret; 287 288 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 289 if (ret != -ENOPARAM) 290 return ret; 291 292 token = fs_parse(fc, ceph_mount_parameters, param, &result); 293 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 294 if (token < 0) 295 return token; 296 297 switch (token) { 298 case Opt_snapdirname: 299 kfree(fsopt->snapdir_name); 300 fsopt->snapdir_name = param->string; 301 param->string = NULL; 302 break; 303 case Opt_mds_namespace: 304 kfree(fsopt->mds_namespace); 305 fsopt->mds_namespace = param->string; 306 param->string = NULL; 307 break; 308 case Opt_recover_session: 309 mode = result.uint_32; 310 if (mode == ceph_recover_session_no) 311 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 312 else if (mode == ceph_recover_session_clean) 313 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 314 else 315 BUG(); 316 break; 317 case Opt_source: 318 if (fc->source) 319 return invalfc(fc, "Multiple sources specified"); 320 return ceph_parse_source(param, fc); 321 case Opt_wsize: 322 if (result.uint_32 < PAGE_SIZE || 323 result.uint_32 > CEPH_MAX_WRITE_SIZE) 324 goto out_of_range; 325 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 326 break; 327 case Opt_rsize: 328 if (result.uint_32 < PAGE_SIZE || 329 result.uint_32 > CEPH_MAX_READ_SIZE) 330 goto out_of_range; 331 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 332 break; 333 case Opt_rasize: 334 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 335 break; 336 case Opt_caps_wanted_delay_min: 337 if (result.uint_32 < 1) 338 goto out_of_range; 339 fsopt->caps_wanted_delay_min = result.uint_32; 340 break; 341 case Opt_caps_wanted_delay_max: 342 if (result.uint_32 < 1) 343 goto out_of_range; 344 fsopt->caps_wanted_delay_max = result.uint_32; 345 break; 346 case Opt_caps_max: 347 if (result.int_32 < 0) 348 goto out_of_range; 349 fsopt->caps_max = result.int_32; 350 break; 351 case Opt_readdir_max_entries: 352 if (result.uint_32 < 1) 353 goto out_of_range; 354 fsopt->max_readdir = result.uint_32; 355 break; 356 case Opt_readdir_max_bytes: 357 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 358 goto out_of_range; 359 fsopt->max_readdir_bytes = result.uint_32; 360 break; 361 case Opt_congestion_kb: 362 if (result.uint_32 < 1024) /* at least 1M */ 363 goto out_of_range; 364 fsopt->congestion_kb = result.uint_32; 365 break; 366 case Opt_dirstat: 367 if (!result.negated) 368 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 369 else 370 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 371 break; 372 case Opt_rbytes: 373 if (!result.negated) 374 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 375 else 376 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 377 break; 378 case Opt_asyncreaddir: 379 if (!result.negated) 380 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 381 else 382 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 383 break; 384 case Opt_dcache: 385 if (!result.negated) 386 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 387 else 388 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 389 break; 390 case Opt_ino32: 391 if (!result.negated) 392 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 393 else 394 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 395 break; 396 397 case Opt_fscache: 398 #ifdef CONFIG_CEPH_FSCACHE 399 kfree(fsopt->fscache_uniq); 400 fsopt->fscache_uniq = NULL; 401 if (result.negated) { 402 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 403 } else { 404 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 405 fsopt->fscache_uniq = param->string; 406 param->string = NULL; 407 } 408 break; 409 #else 410 return invalfc(fc, "fscache support is disabled"); 411 #endif 412 case Opt_poolperm: 413 if (!result.negated) 414 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 415 else 416 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 417 break; 418 case Opt_require_active_mds: 419 if (!result.negated) 420 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 421 else 422 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 423 break; 424 case Opt_quotadf: 425 if (!result.negated) 426 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 427 else 428 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 429 break; 430 case Opt_copyfrom: 431 if (!result.negated) 432 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 433 else 434 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 435 break; 436 case Opt_acl: 437 if (!result.negated) { 438 #ifdef CONFIG_CEPH_FS_POSIX_ACL 439 fc->sb_flags |= SB_POSIXACL; 440 #else 441 return invalfc(fc, "POSIX ACL support is disabled"); 442 #endif 443 } else { 444 fc->sb_flags &= ~SB_POSIXACL; 445 } 446 break; 447 default: 448 BUG(); 449 } 450 return 0; 451 452 out_of_range: 453 return invalfc(fc, "%s out of range", param->key); 454 } 455 456 static void destroy_mount_options(struct ceph_mount_options *args) 457 { 458 dout("destroy_mount_options %p\n", args); 459 if (!args) 460 return; 461 462 kfree(args->snapdir_name); 463 kfree(args->mds_namespace); 464 kfree(args->server_path); 465 kfree(args->fscache_uniq); 466 kfree(args); 467 } 468 469 static int strcmp_null(const char *s1, const char *s2) 470 { 471 if (!s1 && !s2) 472 return 0; 473 if (s1 && !s2) 474 return -1; 475 if (!s1 && s2) 476 return 1; 477 return strcmp(s1, s2); 478 } 479 480 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 481 struct ceph_options *new_opt, 482 struct ceph_fs_client *fsc) 483 { 484 struct ceph_mount_options *fsopt1 = new_fsopt; 485 struct ceph_mount_options *fsopt2 = fsc->mount_options; 486 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 487 int ret; 488 489 ret = memcmp(fsopt1, fsopt2, ofs); 490 if (ret) 491 return ret; 492 493 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 494 if (ret) 495 return ret; 496 497 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 498 if (ret) 499 return ret; 500 501 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 502 if (ret) 503 return ret; 504 505 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 506 if (ret) 507 return ret; 508 509 return ceph_compare_options(new_opt, fsc->client); 510 } 511 512 /** 513 * ceph_show_options - Show mount options in /proc/mounts 514 * @m: seq_file to write to 515 * @root: root of that (sub)tree 516 */ 517 static int ceph_show_options(struct seq_file *m, struct dentry *root) 518 { 519 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 520 struct ceph_mount_options *fsopt = fsc->mount_options; 521 size_t pos; 522 int ret; 523 524 /* a comma between MNT/MS and client options */ 525 seq_putc(m, ','); 526 pos = m->count; 527 528 ret = ceph_print_client_options(m, fsc->client, false); 529 if (ret) 530 return ret; 531 532 /* retract our comma if no client options */ 533 if (m->count == pos) 534 m->count--; 535 536 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 537 seq_puts(m, ",dirstat"); 538 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 539 seq_puts(m, ",rbytes"); 540 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 541 seq_puts(m, ",noasyncreaddir"); 542 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 543 seq_puts(m, ",nodcache"); 544 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 545 seq_puts(m, ",ino32"); 546 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 547 seq_show_option(m, "fsc", fsopt->fscache_uniq); 548 } 549 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 550 seq_puts(m, ",nopoolperm"); 551 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 552 seq_puts(m, ",noquotadf"); 553 554 #ifdef CONFIG_CEPH_FS_POSIX_ACL 555 if (root->d_sb->s_flags & SB_POSIXACL) 556 seq_puts(m, ",acl"); 557 else 558 seq_puts(m, ",noacl"); 559 #endif 560 561 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 562 seq_puts(m, ",copyfrom"); 563 564 if (fsopt->mds_namespace) 565 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 566 567 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 568 seq_show_option(m, "recover_session", "clean"); 569 570 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 571 seq_printf(m, ",wsize=%u", fsopt->wsize); 572 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 573 seq_printf(m, ",rsize=%u", fsopt->rsize); 574 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 575 seq_printf(m, ",rasize=%u", fsopt->rasize); 576 if (fsopt->congestion_kb != default_congestion_kb()) 577 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 578 if (fsopt->caps_max) 579 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 580 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 581 seq_printf(m, ",caps_wanted_delay_min=%u", 582 fsopt->caps_wanted_delay_min); 583 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 584 seq_printf(m, ",caps_wanted_delay_max=%u", 585 fsopt->caps_wanted_delay_max); 586 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 587 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 588 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 589 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 590 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 591 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 592 593 return 0; 594 } 595 596 /* 597 * handle any mon messages the standard library doesn't understand. 598 * return error if we don't either. 599 */ 600 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 601 { 602 struct ceph_fs_client *fsc = client->private; 603 int type = le16_to_cpu(msg->hdr.type); 604 605 switch (type) { 606 case CEPH_MSG_MDS_MAP: 607 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 608 return 0; 609 case CEPH_MSG_FS_MAP_USER: 610 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 611 return 0; 612 default: 613 return -1; 614 } 615 } 616 617 /* 618 * create a new fs client 619 * 620 * Success or not, this function consumes @fsopt and @opt. 621 */ 622 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 623 struct ceph_options *opt) 624 { 625 struct ceph_fs_client *fsc; 626 int page_count; 627 size_t size; 628 int err; 629 630 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 631 if (!fsc) { 632 err = -ENOMEM; 633 goto fail; 634 } 635 636 fsc->client = ceph_create_client(opt, fsc); 637 if (IS_ERR(fsc->client)) { 638 err = PTR_ERR(fsc->client); 639 goto fail; 640 } 641 opt = NULL; /* fsc->client now owns this */ 642 643 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 644 ceph_set_opt(fsc->client, ABORT_ON_FULL); 645 646 if (!fsopt->mds_namespace) { 647 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 648 0, true); 649 } else { 650 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 651 0, false); 652 } 653 654 fsc->mount_options = fsopt; 655 656 fsc->sb = NULL; 657 fsc->mount_state = CEPH_MOUNT_MOUNTING; 658 fsc->filp_gen = 1; 659 fsc->have_copy_from2 = true; 660 661 atomic_long_set(&fsc->writeback_count, 0); 662 663 err = -ENOMEM; 664 /* 665 * The number of concurrent works can be high but they don't need 666 * to be processed in parallel, limit concurrency. 667 */ 668 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 669 if (!fsc->inode_wq) 670 goto fail_client; 671 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 672 if (!fsc->cap_wq) 673 goto fail_inode_wq; 674 675 /* set up mempools */ 676 err = -ENOMEM; 677 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 678 size = sizeof (struct page *) * (page_count ? page_count : 1); 679 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 680 if (!fsc->wb_pagevec_pool) 681 goto fail_cap_wq; 682 683 return fsc; 684 685 fail_cap_wq: 686 destroy_workqueue(fsc->cap_wq); 687 fail_inode_wq: 688 destroy_workqueue(fsc->inode_wq); 689 fail_client: 690 ceph_destroy_client(fsc->client); 691 fail: 692 kfree(fsc); 693 if (opt) 694 ceph_destroy_options(opt); 695 destroy_mount_options(fsopt); 696 return ERR_PTR(err); 697 } 698 699 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 700 { 701 flush_workqueue(fsc->inode_wq); 702 flush_workqueue(fsc->cap_wq); 703 } 704 705 static void destroy_fs_client(struct ceph_fs_client *fsc) 706 { 707 dout("destroy_fs_client %p\n", fsc); 708 709 ceph_mdsc_destroy(fsc); 710 destroy_workqueue(fsc->inode_wq); 711 destroy_workqueue(fsc->cap_wq); 712 713 mempool_destroy(fsc->wb_pagevec_pool); 714 715 destroy_mount_options(fsc->mount_options); 716 717 ceph_destroy_client(fsc->client); 718 719 kfree(fsc); 720 dout("destroy_fs_client %p done\n", fsc); 721 } 722 723 /* 724 * caches 725 */ 726 struct kmem_cache *ceph_inode_cachep; 727 struct kmem_cache *ceph_cap_cachep; 728 struct kmem_cache *ceph_cap_flush_cachep; 729 struct kmem_cache *ceph_dentry_cachep; 730 struct kmem_cache *ceph_file_cachep; 731 struct kmem_cache *ceph_dir_file_cachep; 732 733 static void ceph_inode_init_once(void *foo) 734 { 735 struct ceph_inode_info *ci = foo; 736 inode_init_once(&ci->vfs_inode); 737 } 738 739 static int __init init_caches(void) 740 { 741 int error = -ENOMEM; 742 743 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 744 sizeof(struct ceph_inode_info), 745 __alignof__(struct ceph_inode_info), 746 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 747 SLAB_ACCOUNT, ceph_inode_init_once); 748 if (!ceph_inode_cachep) 749 return -ENOMEM; 750 751 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 752 if (!ceph_cap_cachep) 753 goto bad_cap; 754 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 755 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 756 if (!ceph_cap_flush_cachep) 757 goto bad_cap_flush; 758 759 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 760 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 761 if (!ceph_dentry_cachep) 762 goto bad_dentry; 763 764 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 765 if (!ceph_file_cachep) 766 goto bad_file; 767 768 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 769 if (!ceph_dir_file_cachep) 770 goto bad_dir_file; 771 772 error = ceph_fscache_register(); 773 if (error) 774 goto bad_fscache; 775 776 return 0; 777 778 bad_fscache: 779 kmem_cache_destroy(ceph_dir_file_cachep); 780 bad_dir_file: 781 kmem_cache_destroy(ceph_file_cachep); 782 bad_file: 783 kmem_cache_destroy(ceph_dentry_cachep); 784 bad_dentry: 785 kmem_cache_destroy(ceph_cap_flush_cachep); 786 bad_cap_flush: 787 kmem_cache_destroy(ceph_cap_cachep); 788 bad_cap: 789 kmem_cache_destroy(ceph_inode_cachep); 790 return error; 791 } 792 793 static void destroy_caches(void) 794 { 795 /* 796 * Make sure all delayed rcu free inodes are flushed before we 797 * destroy cache. 798 */ 799 rcu_barrier(); 800 801 kmem_cache_destroy(ceph_inode_cachep); 802 kmem_cache_destroy(ceph_cap_cachep); 803 kmem_cache_destroy(ceph_cap_flush_cachep); 804 kmem_cache_destroy(ceph_dentry_cachep); 805 kmem_cache_destroy(ceph_file_cachep); 806 kmem_cache_destroy(ceph_dir_file_cachep); 807 808 ceph_fscache_unregister(); 809 } 810 811 /* 812 * ceph_umount_begin - initiate forced umount. Tear down down the 813 * mount, skipping steps that may hang while waiting for server(s). 814 */ 815 static void ceph_umount_begin(struct super_block *sb) 816 { 817 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 818 819 dout("ceph_umount_begin - starting forced umount\n"); 820 if (!fsc) 821 return; 822 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 823 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 824 ceph_mdsc_force_umount(fsc->mdsc); 825 fsc->filp_gen++; // invalidate open files 826 } 827 828 static const struct super_operations ceph_super_ops = { 829 .alloc_inode = ceph_alloc_inode, 830 .free_inode = ceph_free_inode, 831 .write_inode = ceph_write_inode, 832 .drop_inode = generic_delete_inode, 833 .evict_inode = ceph_evict_inode, 834 .sync_fs = ceph_sync_fs, 835 .put_super = ceph_put_super, 836 .show_options = ceph_show_options, 837 .statfs = ceph_statfs, 838 .umount_begin = ceph_umount_begin, 839 }; 840 841 /* 842 * Bootstrap mount by opening the root directory. Note the mount 843 * @started time from caller, and time out if this takes too long. 844 */ 845 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 846 const char *path, 847 unsigned long started) 848 { 849 struct ceph_mds_client *mdsc = fsc->mdsc; 850 struct ceph_mds_request *req = NULL; 851 int err; 852 struct dentry *root; 853 854 /* open dir */ 855 dout("open_root_inode opening '%s'\n", path); 856 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 857 if (IS_ERR(req)) 858 return ERR_CAST(req); 859 req->r_path1 = kstrdup(path, GFP_NOFS); 860 if (!req->r_path1) { 861 root = ERR_PTR(-ENOMEM); 862 goto out; 863 } 864 865 req->r_ino1.ino = CEPH_INO_ROOT; 866 req->r_ino1.snap = CEPH_NOSNAP; 867 req->r_started = started; 868 req->r_timeout = fsc->client->options->mount_timeout; 869 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 870 req->r_num_caps = 2; 871 err = ceph_mdsc_do_request(mdsc, NULL, req); 872 if (err == 0) { 873 struct inode *inode = req->r_target_inode; 874 req->r_target_inode = NULL; 875 dout("open_root_inode success\n"); 876 root = d_make_root(inode); 877 if (!root) { 878 root = ERR_PTR(-ENOMEM); 879 goto out; 880 } 881 dout("open_root_inode success, root dentry is %p\n", root); 882 } else { 883 root = ERR_PTR(err); 884 } 885 out: 886 ceph_mdsc_put_request(req); 887 return root; 888 } 889 890 /* 891 * mount: join the ceph cluster, and open root directory. 892 */ 893 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 894 struct fs_context *fc) 895 { 896 int err; 897 unsigned long started = jiffies; /* note the start time */ 898 struct dentry *root; 899 900 dout("mount start %p\n", fsc); 901 mutex_lock(&fsc->client->mount_mutex); 902 903 if (!fsc->sb->s_root) { 904 const char *path = fsc->mount_options->server_path ? 905 fsc->mount_options->server_path + 1 : ""; 906 907 err = __ceph_open_session(fsc->client, started); 908 if (err < 0) 909 goto out; 910 911 /* setup fscache */ 912 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 913 err = ceph_fscache_register_fs(fsc, fc); 914 if (err < 0) 915 goto out; 916 } 917 918 dout("mount opening path '%s'\n", path); 919 920 ceph_fs_debugfs_init(fsc); 921 922 root = open_root_dentry(fsc, path, started); 923 if (IS_ERR(root)) { 924 err = PTR_ERR(root); 925 goto out; 926 } 927 fsc->sb->s_root = dget(root); 928 } else { 929 root = dget(fsc->sb->s_root); 930 } 931 932 fsc->mount_state = CEPH_MOUNT_MOUNTED; 933 dout("mount success\n"); 934 mutex_unlock(&fsc->client->mount_mutex); 935 return root; 936 937 out: 938 mutex_unlock(&fsc->client->mount_mutex); 939 return ERR_PTR(err); 940 } 941 942 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 943 { 944 struct ceph_fs_client *fsc = s->s_fs_info; 945 int ret; 946 947 dout("set_super %p\n", s); 948 949 s->s_maxbytes = MAX_LFS_FILESIZE; 950 951 s->s_xattr = ceph_xattr_handlers; 952 fsc->sb = s; 953 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 954 955 s->s_op = &ceph_super_ops; 956 s->s_d_op = &ceph_dentry_ops; 957 s->s_export_op = &ceph_export_ops; 958 959 s->s_time_gran = 1; 960 s->s_time_min = 0; 961 s->s_time_max = U32_MAX; 962 963 ret = set_anon_super_fc(s, fc); 964 if (ret != 0) 965 fsc->sb = NULL; 966 return ret; 967 } 968 969 /* 970 * share superblock if same fs AND options 971 */ 972 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 973 { 974 struct ceph_fs_client *new = fc->s_fs_info; 975 struct ceph_mount_options *fsopt = new->mount_options; 976 struct ceph_options *opt = new->client->options; 977 struct ceph_fs_client *other = ceph_sb_to_client(sb); 978 979 dout("ceph_compare_super %p\n", sb); 980 981 if (compare_mount_options(fsopt, opt, other)) { 982 dout("monitor(s)/mount options don't match\n"); 983 return 0; 984 } 985 if ((opt->flags & CEPH_OPT_FSID) && 986 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 987 dout("fsid doesn't match\n"); 988 return 0; 989 } 990 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 991 dout("flags differ\n"); 992 return 0; 993 } 994 return 1; 995 } 996 997 /* 998 * construct our own bdi so we can control readahead, etc. 999 */ 1000 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1001 1002 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1003 { 1004 int err; 1005 1006 err = super_setup_bdi_name(sb, "ceph-%ld", 1007 atomic_long_inc_return(&bdi_seq)); 1008 if (err) 1009 return err; 1010 1011 /* set ra_pages based on rasize mount option? */ 1012 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1013 1014 /* set io_pages based on max osd read size */ 1015 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1016 1017 return 0; 1018 } 1019 1020 static int ceph_get_tree(struct fs_context *fc) 1021 { 1022 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1023 struct super_block *sb; 1024 struct ceph_fs_client *fsc; 1025 struct dentry *res; 1026 int (*compare_super)(struct super_block *, struct fs_context *) = 1027 ceph_compare_super; 1028 int err; 1029 1030 dout("ceph_get_tree\n"); 1031 1032 if (!fc->source) 1033 return invalfc(fc, "No source"); 1034 1035 /* create client (which we may/may not use) */ 1036 fsc = create_fs_client(pctx->opts, pctx->copts); 1037 pctx->opts = NULL; 1038 pctx->copts = NULL; 1039 if (IS_ERR(fsc)) { 1040 err = PTR_ERR(fsc); 1041 goto out_final; 1042 } 1043 1044 err = ceph_mdsc_init(fsc); 1045 if (err < 0) 1046 goto out; 1047 1048 if (ceph_test_opt(fsc->client, NOSHARE)) 1049 compare_super = NULL; 1050 1051 fc->s_fs_info = fsc; 1052 sb = sget_fc(fc, compare_super, ceph_set_super); 1053 fc->s_fs_info = NULL; 1054 if (IS_ERR(sb)) { 1055 err = PTR_ERR(sb); 1056 goto out; 1057 } 1058 1059 if (ceph_sb_to_client(sb) != fsc) { 1060 destroy_fs_client(fsc); 1061 fsc = ceph_sb_to_client(sb); 1062 dout("get_sb got existing client %p\n", fsc); 1063 } else { 1064 dout("get_sb using new client %p\n", fsc); 1065 err = ceph_setup_bdi(sb, fsc); 1066 if (err < 0) 1067 goto out_splat; 1068 } 1069 1070 res = ceph_real_mount(fsc, fc); 1071 if (IS_ERR(res)) { 1072 err = PTR_ERR(res); 1073 goto out_splat; 1074 } 1075 dout("root %p inode %p ino %llx.%llx\n", res, 1076 d_inode(res), ceph_vinop(d_inode(res))); 1077 fc->root = fsc->sb->s_root; 1078 return 0; 1079 1080 out_splat: 1081 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1082 pr_info("No mds server is up or the cluster is laggy\n"); 1083 err = -EHOSTUNREACH; 1084 } 1085 1086 ceph_mdsc_close_sessions(fsc->mdsc); 1087 deactivate_locked_super(sb); 1088 goto out_final; 1089 1090 out: 1091 destroy_fs_client(fsc); 1092 out_final: 1093 dout("ceph_get_tree fail %d\n", err); 1094 return err; 1095 } 1096 1097 static void ceph_free_fc(struct fs_context *fc) 1098 { 1099 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1100 1101 if (pctx) { 1102 destroy_mount_options(pctx->opts); 1103 ceph_destroy_options(pctx->copts); 1104 kfree(pctx); 1105 } 1106 } 1107 1108 static int ceph_reconfigure_fc(struct fs_context *fc) 1109 { 1110 sync_filesystem(fc->root->d_sb); 1111 return 0; 1112 } 1113 1114 static const struct fs_context_operations ceph_context_ops = { 1115 .free = ceph_free_fc, 1116 .parse_param = ceph_parse_mount_param, 1117 .get_tree = ceph_get_tree, 1118 .reconfigure = ceph_reconfigure_fc, 1119 }; 1120 1121 /* 1122 * Set up the filesystem mount context. 1123 */ 1124 static int ceph_init_fs_context(struct fs_context *fc) 1125 { 1126 struct ceph_parse_opts_ctx *pctx; 1127 struct ceph_mount_options *fsopt; 1128 1129 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1130 if (!pctx) 1131 return -ENOMEM; 1132 1133 pctx->copts = ceph_alloc_options(); 1134 if (!pctx->copts) 1135 goto nomem; 1136 1137 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1138 if (!pctx->opts) 1139 goto nomem; 1140 1141 fsopt = pctx->opts; 1142 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1143 1144 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1145 fsopt->rsize = CEPH_MAX_READ_SIZE; 1146 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1147 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1148 if (!fsopt->snapdir_name) 1149 goto nomem; 1150 1151 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1152 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1153 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1154 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1155 fsopt->congestion_kb = default_congestion_kb(); 1156 1157 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1158 fc->sb_flags |= SB_POSIXACL; 1159 #endif 1160 1161 fc->fs_private = pctx; 1162 fc->ops = &ceph_context_ops; 1163 return 0; 1164 1165 nomem: 1166 destroy_mount_options(pctx->opts); 1167 ceph_destroy_options(pctx->copts); 1168 kfree(pctx); 1169 return -ENOMEM; 1170 } 1171 1172 static void ceph_kill_sb(struct super_block *s) 1173 { 1174 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1175 dev_t dev = s->s_dev; 1176 1177 dout("kill_sb %p\n", s); 1178 1179 ceph_mdsc_pre_umount(fsc->mdsc); 1180 flush_fs_workqueues(fsc); 1181 1182 generic_shutdown_super(s); 1183 1184 fsc->client->extra_mon_dispatch = NULL; 1185 ceph_fs_debugfs_cleanup(fsc); 1186 1187 ceph_fscache_unregister_fs(fsc); 1188 1189 destroy_fs_client(fsc); 1190 free_anon_bdev(dev); 1191 } 1192 1193 static struct file_system_type ceph_fs_type = { 1194 .owner = THIS_MODULE, 1195 .name = "ceph", 1196 .init_fs_context = ceph_init_fs_context, 1197 .kill_sb = ceph_kill_sb, 1198 .fs_flags = FS_RENAME_DOES_D_MOVE, 1199 }; 1200 MODULE_ALIAS_FS("ceph"); 1201 1202 int ceph_force_reconnect(struct super_block *sb) 1203 { 1204 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1205 int err = 0; 1206 1207 ceph_umount_begin(sb); 1208 1209 /* Make sure all page caches get invalidated. 1210 * see remove_session_caps_cb() */ 1211 flush_workqueue(fsc->inode_wq); 1212 1213 /* In case that we were blacklisted. This also reset 1214 * all mon/osd connections */ 1215 ceph_reset_client_addr(fsc->client); 1216 1217 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1218 1219 fsc->blacklisted = false; 1220 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1221 1222 if (sb->s_root) { 1223 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1224 CEPH_STAT_CAP_INODE, true); 1225 } 1226 return err; 1227 } 1228 1229 static int __init init_ceph(void) 1230 { 1231 int ret = init_caches(); 1232 if (ret) 1233 goto out; 1234 1235 ceph_flock_init(); 1236 ret = register_filesystem(&ceph_fs_type); 1237 if (ret) 1238 goto out_caches; 1239 1240 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1241 1242 return 0; 1243 1244 out_caches: 1245 destroy_caches(); 1246 out: 1247 return ret; 1248 } 1249 1250 static void __exit exit_ceph(void) 1251 { 1252 dout("exit_ceph\n"); 1253 unregister_filesystem(&ceph_fs_type); 1254 destroy_caches(); 1255 } 1256 1257 module_init(init_ceph); 1258 module_exit(exit_ceph); 1259 1260 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1261 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1262 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1263 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1264 MODULE_LICENSE("GPL"); 1265