1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/sched.h> 15 #include <linux/seq_file.h> 16 #include <linux/slab.h> 17 #include <linux/statfs.h> 18 #include <linux/string.h> 19 20 #include "super.h" 21 #include "mds_client.h" 22 #include "cache.h" 23 24 #include <linux/ceph/ceph_features.h> 25 #include <linux/ceph/decode.h> 26 #include <linux/ceph/mon_client.h> 27 #include <linux/ceph/auth.h> 28 #include <linux/ceph/debugfs.h> 29 30 /* 31 * Ceph superblock operations 32 * 33 * Handle the basics of mounting, unmounting. 34 */ 35 36 /* 37 * super ops 38 */ 39 static void ceph_put_super(struct super_block *s) 40 { 41 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 42 43 dout("put_super\n"); 44 ceph_mdsc_close_sessions(fsc->mdsc); 45 } 46 47 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 48 { 49 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 50 struct ceph_mon_client *monc = &fsc->client->monc; 51 struct ceph_statfs st; 52 u64 fsid; 53 int err; 54 u64 data_pool; 55 56 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 57 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 58 } else { 59 data_pool = CEPH_NOPOOL; 60 } 61 62 dout("statfs\n"); 63 err = ceph_monc_do_statfs(monc, data_pool, &st); 64 if (err < 0) 65 return err; 66 67 /* fill in kstatfs */ 68 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 69 70 /* 71 * express utilization in terms of large blocks to avoid 72 * overflow on 32-bit machines. 73 * 74 * NOTE: for the time being, we make bsize == frsize to humor 75 * not-yet-ancient versions of glibc that are broken. 76 * Someday, we will probably want to report a real block 77 * size... whatever that may mean for a network file system! 78 */ 79 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 80 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 81 82 /* 83 * By default use root quota for stats; fallback to overall filesystem 84 * usage if using 'noquotadf' mount option or if the root dir doesn't 85 * have max_bytes quota set. 86 */ 87 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 88 !ceph_quota_update_statfs(fsc, buf)) { 89 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 92 } 93 94 buf->f_files = le64_to_cpu(st.num_objects); 95 buf->f_ffree = -1; 96 buf->f_namelen = NAME_MAX; 97 98 /* Must convert the fsid, for consistent values across arches */ 99 mutex_lock(&monc->mutex); 100 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 101 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 102 mutex_unlock(&monc->mutex); 103 104 buf->f_fsid.val[0] = fsid & 0xffffffff; 105 buf->f_fsid.val[1] = fsid >> 32; 106 107 return 0; 108 } 109 110 static int ceph_sync_fs(struct super_block *sb, int wait) 111 { 112 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 113 114 if (!wait) { 115 dout("sync_fs (non-blocking)\n"); 116 ceph_flush_dirty_caps(fsc->mdsc); 117 dout("sync_fs (non-blocking) done\n"); 118 return 0; 119 } 120 121 dout("sync_fs (blocking)\n"); 122 ceph_osdc_sync(&fsc->client->osdc); 123 ceph_mdsc_sync(fsc->mdsc); 124 dout("sync_fs (blocking) done\n"); 125 return 0; 126 } 127 128 /* 129 * mount options 130 */ 131 enum { 132 Opt_wsize, 133 Opt_rsize, 134 Opt_rasize, 135 Opt_caps_wanted_delay_min, 136 Opt_caps_wanted_delay_max, 137 Opt_caps_max, 138 Opt_readdir_max_entries, 139 Opt_readdir_max_bytes, 140 Opt_congestion_kb, 141 /* int args above */ 142 Opt_snapdirname, 143 Opt_mds_namespace, 144 Opt_recover_session, 145 Opt_source, 146 /* string args above */ 147 Opt_dirstat, 148 Opt_rbytes, 149 Opt_asyncreaddir, 150 Opt_dcache, 151 Opt_ino32, 152 Opt_fscache, 153 Opt_poolperm, 154 Opt_require_active_mds, 155 Opt_acl, 156 Opt_quotadf, 157 Opt_copyfrom, 158 }; 159 160 enum ceph_recover_session_mode { 161 ceph_recover_session_no, 162 ceph_recover_session_clean 163 }; 164 165 static const struct constant_table ceph_param_recover[] = { 166 { "no", ceph_recover_session_no }, 167 { "clean", ceph_recover_session_clean }, 168 {} 169 }; 170 171 static const struct fs_parameter_spec ceph_mount_parameters[] = { 172 fsparam_flag_no ("acl", Opt_acl), 173 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), 174 fsparam_s32 ("caps_max", Opt_caps_max), 175 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), 176 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), 177 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), 178 fsparam_flag_no ("copyfrom", Opt_copyfrom), 179 fsparam_flag_no ("dcache", Opt_dcache), 180 fsparam_flag_no ("dirstat", Opt_dirstat), 181 fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc 182 fsparam_string ("fsc", Opt_fscache), // fsc=... 183 fsparam_flag_no ("ino32", Opt_ino32), 184 fsparam_string ("mds_namespace", Opt_mds_namespace), 185 fsparam_flag_no ("poolperm", Opt_poolperm), 186 fsparam_flag_no ("quotadf", Opt_quotadf), 187 fsparam_u32 ("rasize", Opt_rasize), 188 fsparam_flag_no ("rbytes", Opt_rbytes), 189 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), 190 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), 191 fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), 192 fsparam_flag_no ("require_active_mds", Opt_require_active_mds), 193 fsparam_u32 ("rsize", Opt_rsize), 194 fsparam_string ("snapdirname", Opt_snapdirname), 195 fsparam_string ("source", Opt_source), 196 fsparam_u32 ("wsize", Opt_wsize), 197 {} 198 }; 199 200 struct ceph_parse_opts_ctx { 201 struct ceph_options *copts; 202 struct ceph_mount_options *opts; 203 }; 204 205 /* 206 * Parse the source parameter. Distinguish the server list from the path. 207 * 208 * The source will look like: 209 * <server_spec>[,<server_spec>...]:[<path>] 210 * where 211 * <server_spec> is <ip>[:<port>] 212 * <path> is optional, but if present must begin with '/' 213 */ 214 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) 215 { 216 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 217 struct ceph_mount_options *fsopt = pctx->opts; 218 char *dev_name = param->string, *dev_name_end; 219 int ret; 220 221 dout("%s '%s'\n", __func__, dev_name); 222 if (!dev_name || !*dev_name) 223 return invalfc(fc, "Empty source"); 224 225 dev_name_end = strchr(dev_name, '/'); 226 if (dev_name_end) { 227 kfree(fsopt->server_path); 228 229 /* 230 * The server_path will include the whole chars from userland 231 * including the leading '/'. 232 */ 233 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 234 if (!fsopt->server_path) 235 return -ENOMEM; 236 } else { 237 dev_name_end = dev_name + strlen(dev_name); 238 } 239 240 dev_name_end--; /* back up to ':' separator */ 241 if (dev_name_end < dev_name || *dev_name_end != ':') 242 return invalfc(fc, "No path or : separator in source"); 243 244 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 245 if (fsopt->server_path) 246 dout("server path '%s'\n", fsopt->server_path); 247 248 ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, 249 pctx->copts, fc->log.log); 250 if (ret) 251 return ret; 252 253 fc->source = param->string; 254 param->string = NULL; 255 return 0; 256 } 257 258 static int ceph_parse_mount_param(struct fs_context *fc, 259 struct fs_parameter *param) 260 { 261 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 262 struct ceph_mount_options *fsopt = pctx->opts; 263 struct fs_parse_result result; 264 unsigned int mode; 265 int token, ret; 266 267 ret = ceph_parse_param(param, pctx->copts, fc->log.log); 268 if (ret != -ENOPARAM) 269 return ret; 270 271 token = fs_parse(fc, ceph_mount_parameters, param, &result); 272 dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 273 if (token < 0) 274 return token; 275 276 switch (token) { 277 case Opt_snapdirname: 278 kfree(fsopt->snapdir_name); 279 fsopt->snapdir_name = param->string; 280 param->string = NULL; 281 break; 282 case Opt_mds_namespace: 283 kfree(fsopt->mds_namespace); 284 fsopt->mds_namespace = param->string; 285 param->string = NULL; 286 break; 287 case Opt_recover_session: 288 mode = result.uint_32; 289 if (mode == ceph_recover_session_no) 290 fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; 291 else if (mode == ceph_recover_session_clean) 292 fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; 293 else 294 BUG(); 295 break; 296 case Opt_source: 297 if (fc->source) 298 return invalfc(fc, "Multiple sources specified"); 299 return ceph_parse_source(param, fc); 300 case Opt_wsize: 301 if (result.uint_32 < PAGE_SIZE || 302 result.uint_32 > CEPH_MAX_WRITE_SIZE) 303 goto out_of_range; 304 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); 305 break; 306 case Opt_rsize: 307 if (result.uint_32 < PAGE_SIZE || 308 result.uint_32 > CEPH_MAX_READ_SIZE) 309 goto out_of_range; 310 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); 311 break; 312 case Opt_rasize: 313 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); 314 break; 315 case Opt_caps_wanted_delay_min: 316 if (result.uint_32 < 1) 317 goto out_of_range; 318 fsopt->caps_wanted_delay_min = result.uint_32; 319 break; 320 case Opt_caps_wanted_delay_max: 321 if (result.uint_32 < 1) 322 goto out_of_range; 323 fsopt->caps_wanted_delay_max = result.uint_32; 324 break; 325 case Opt_caps_max: 326 if (result.int_32 < 0) 327 goto out_of_range; 328 fsopt->caps_max = result.int_32; 329 break; 330 case Opt_readdir_max_entries: 331 if (result.uint_32 < 1) 332 goto out_of_range; 333 fsopt->max_readdir = result.uint_32; 334 break; 335 case Opt_readdir_max_bytes: 336 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) 337 goto out_of_range; 338 fsopt->max_readdir_bytes = result.uint_32; 339 break; 340 case Opt_congestion_kb: 341 if (result.uint_32 < 1024) /* at least 1M */ 342 goto out_of_range; 343 fsopt->congestion_kb = result.uint_32; 344 break; 345 case Opt_dirstat: 346 if (!result.negated) 347 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 348 else 349 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 350 break; 351 case Opt_rbytes: 352 if (!result.negated) 353 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 354 else 355 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 356 break; 357 case Opt_asyncreaddir: 358 if (!result.negated) 359 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 360 else 361 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 362 break; 363 case Opt_dcache: 364 if (!result.negated) 365 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 366 else 367 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 368 break; 369 case Opt_ino32: 370 if (!result.negated) 371 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 372 else 373 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 374 break; 375 376 case Opt_fscache: 377 #ifdef CONFIG_CEPH_FSCACHE 378 kfree(fsopt->fscache_uniq); 379 fsopt->fscache_uniq = NULL; 380 if (result.negated) { 381 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 382 } else { 383 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 384 fsopt->fscache_uniq = param->string; 385 param->string = NULL; 386 } 387 break; 388 #else 389 return invalfc(fc, "fscache support is disabled"); 390 #endif 391 case Opt_poolperm: 392 if (!result.negated) 393 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 394 else 395 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 396 break; 397 case Opt_require_active_mds: 398 if (!result.negated) 399 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 400 else 401 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 402 break; 403 case Opt_quotadf: 404 if (!result.negated) 405 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 406 else 407 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 408 break; 409 case Opt_copyfrom: 410 if (!result.negated) 411 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 412 else 413 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 414 break; 415 case Opt_acl: 416 if (!result.negated) { 417 #ifdef CONFIG_CEPH_FS_POSIX_ACL 418 fc->sb_flags |= SB_POSIXACL; 419 #else 420 return invalfc(fc, "POSIX ACL support is disabled"); 421 #endif 422 } else { 423 fc->sb_flags &= ~SB_POSIXACL; 424 } 425 break; 426 default: 427 BUG(); 428 } 429 return 0; 430 431 out_of_range: 432 return invalfc(fc, "%s out of range", param->key); 433 } 434 435 static void destroy_mount_options(struct ceph_mount_options *args) 436 { 437 dout("destroy_mount_options %p\n", args); 438 if (!args) 439 return; 440 441 kfree(args->snapdir_name); 442 kfree(args->mds_namespace); 443 kfree(args->server_path); 444 kfree(args->fscache_uniq); 445 kfree(args); 446 } 447 448 static int strcmp_null(const char *s1, const char *s2) 449 { 450 if (!s1 && !s2) 451 return 0; 452 if (s1 && !s2) 453 return -1; 454 if (!s1 && s2) 455 return 1; 456 return strcmp(s1, s2); 457 } 458 459 /** 460 * path_remove_extra_slash - Remove the extra slashes in the server path 461 * @server_path: the server path and could be NULL 462 * 463 * Return NULL if the path is NULL or only consists of "/", or a string 464 * without any extra slashes including the leading slash(es) and the 465 * slash(es) at the end of the server path, such as: 466 * "//dir1////dir2///" --> "dir1/dir2" 467 */ 468 static char *path_remove_extra_slash(const char *server_path) 469 { 470 const char *path = server_path; 471 const char *cur, *end; 472 char *buf, *p; 473 int len; 474 475 /* if the server path is omitted */ 476 if (!path) 477 return NULL; 478 479 /* remove all the leading slashes */ 480 while (*path == '/') 481 path++; 482 483 /* if the server path only consists of slashes */ 484 if (*path == '\0') 485 return NULL; 486 487 len = strlen(path); 488 489 buf = kmalloc(len + 1, GFP_KERNEL); 490 if (!buf) 491 return ERR_PTR(-ENOMEM); 492 493 end = path + len; 494 p = buf; 495 do { 496 cur = strchr(path, '/'); 497 if (!cur) 498 cur = end; 499 500 len = cur - path; 501 502 /* including one '/' */ 503 if (cur != end) 504 len += 1; 505 506 memcpy(p, path, len); 507 p += len; 508 509 while (cur <= end && *cur == '/') 510 cur++; 511 path = cur; 512 } while (path < end); 513 514 *p = '\0'; 515 516 /* 517 * remove the last slash if there has and just to make sure that 518 * we will get something like "dir1/dir2" 519 */ 520 if (*(--p) == '/') 521 *p = '\0'; 522 523 return buf; 524 } 525 526 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 527 struct ceph_options *new_opt, 528 struct ceph_fs_client *fsc) 529 { 530 struct ceph_mount_options *fsopt1 = new_fsopt; 531 struct ceph_mount_options *fsopt2 = fsc->mount_options; 532 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 533 char *p1, *p2; 534 int ret; 535 536 ret = memcmp(fsopt1, fsopt2, ofs); 537 if (ret) 538 return ret; 539 540 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 541 if (ret) 542 return ret; 543 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 544 if (ret) 545 return ret; 546 547 p1 = path_remove_extra_slash(fsopt1->server_path); 548 if (IS_ERR(p1)) 549 return PTR_ERR(p1); 550 p2 = path_remove_extra_slash(fsopt2->server_path); 551 if (IS_ERR(p2)) { 552 kfree(p1); 553 return PTR_ERR(p2); 554 } 555 ret = strcmp_null(p1, p2); 556 kfree(p1); 557 kfree(p2); 558 if (ret) 559 return ret; 560 561 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 562 if (ret) 563 return ret; 564 565 return ceph_compare_options(new_opt, fsc->client); 566 } 567 568 /** 569 * ceph_show_options - Show mount options in /proc/mounts 570 * @m: seq_file to write to 571 * @root: root of that (sub)tree 572 */ 573 static int ceph_show_options(struct seq_file *m, struct dentry *root) 574 { 575 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 576 struct ceph_mount_options *fsopt = fsc->mount_options; 577 size_t pos; 578 int ret; 579 580 /* a comma between MNT/MS and client options */ 581 seq_putc(m, ','); 582 pos = m->count; 583 584 ret = ceph_print_client_options(m, fsc->client, false); 585 if (ret) 586 return ret; 587 588 /* retract our comma if no client options */ 589 if (m->count == pos) 590 m->count--; 591 592 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 593 seq_puts(m, ",dirstat"); 594 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 595 seq_puts(m, ",rbytes"); 596 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 597 seq_puts(m, ",noasyncreaddir"); 598 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 599 seq_puts(m, ",nodcache"); 600 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 601 seq_puts(m, ",ino32"); 602 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 603 seq_show_option(m, "fsc", fsopt->fscache_uniq); 604 } 605 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 606 seq_puts(m, ",nopoolperm"); 607 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 608 seq_puts(m, ",noquotadf"); 609 610 #ifdef CONFIG_CEPH_FS_POSIX_ACL 611 if (root->d_sb->s_flags & SB_POSIXACL) 612 seq_puts(m, ",acl"); 613 else 614 seq_puts(m, ",noacl"); 615 #endif 616 617 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 618 seq_puts(m, ",copyfrom"); 619 620 if (fsopt->mds_namespace) 621 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 622 623 if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) 624 seq_show_option(m, "recover_session", "clean"); 625 626 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 627 seq_printf(m, ",wsize=%u", fsopt->wsize); 628 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 629 seq_printf(m, ",rsize=%u", fsopt->rsize); 630 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 631 seq_printf(m, ",rasize=%u", fsopt->rasize); 632 if (fsopt->congestion_kb != default_congestion_kb()) 633 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); 634 if (fsopt->caps_max) 635 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 636 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 637 seq_printf(m, ",caps_wanted_delay_min=%u", 638 fsopt->caps_wanted_delay_min); 639 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 640 seq_printf(m, ",caps_wanted_delay_max=%u", 641 fsopt->caps_wanted_delay_max); 642 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 643 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); 644 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 645 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); 646 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 647 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 648 649 return 0; 650 } 651 652 /* 653 * handle any mon messages the standard library doesn't understand. 654 * return error if we don't either. 655 */ 656 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 657 { 658 struct ceph_fs_client *fsc = client->private; 659 int type = le16_to_cpu(msg->hdr.type); 660 661 switch (type) { 662 case CEPH_MSG_MDS_MAP: 663 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 664 return 0; 665 case CEPH_MSG_FS_MAP_USER: 666 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 667 return 0; 668 default: 669 return -1; 670 } 671 } 672 673 /* 674 * create a new fs client 675 * 676 * Success or not, this function consumes @fsopt and @opt. 677 */ 678 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 679 struct ceph_options *opt) 680 { 681 struct ceph_fs_client *fsc; 682 int page_count; 683 size_t size; 684 int err; 685 686 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 687 if (!fsc) { 688 err = -ENOMEM; 689 goto fail; 690 } 691 692 fsc->client = ceph_create_client(opt, fsc); 693 if (IS_ERR(fsc->client)) { 694 err = PTR_ERR(fsc->client); 695 goto fail; 696 } 697 opt = NULL; /* fsc->client now owns this */ 698 699 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 700 ceph_set_opt(fsc->client, ABORT_ON_FULL); 701 702 if (!fsopt->mds_namespace) { 703 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 704 0, true); 705 } else { 706 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 707 0, false); 708 } 709 710 fsc->mount_options = fsopt; 711 712 fsc->sb = NULL; 713 fsc->mount_state = CEPH_MOUNT_MOUNTING; 714 fsc->filp_gen = 1; 715 fsc->have_copy_from2 = true; 716 717 atomic_long_set(&fsc->writeback_count, 0); 718 719 err = -ENOMEM; 720 /* 721 * The number of concurrent works can be high but they don't need 722 * to be processed in parallel, limit concurrency. 723 */ 724 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); 725 if (!fsc->inode_wq) 726 goto fail_client; 727 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 728 if (!fsc->cap_wq) 729 goto fail_inode_wq; 730 731 /* set up mempools */ 732 err = -ENOMEM; 733 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 734 size = sizeof (struct page *) * (page_count ? page_count : 1); 735 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 736 if (!fsc->wb_pagevec_pool) 737 goto fail_cap_wq; 738 739 return fsc; 740 741 fail_cap_wq: 742 destroy_workqueue(fsc->cap_wq); 743 fail_inode_wq: 744 destroy_workqueue(fsc->inode_wq); 745 fail_client: 746 ceph_destroy_client(fsc->client); 747 fail: 748 kfree(fsc); 749 if (opt) 750 ceph_destroy_options(opt); 751 destroy_mount_options(fsopt); 752 return ERR_PTR(err); 753 } 754 755 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 756 { 757 flush_workqueue(fsc->inode_wq); 758 flush_workqueue(fsc->cap_wq); 759 } 760 761 static void destroy_fs_client(struct ceph_fs_client *fsc) 762 { 763 dout("destroy_fs_client %p\n", fsc); 764 765 ceph_mdsc_destroy(fsc); 766 destroy_workqueue(fsc->inode_wq); 767 destroy_workqueue(fsc->cap_wq); 768 769 mempool_destroy(fsc->wb_pagevec_pool); 770 771 destroy_mount_options(fsc->mount_options); 772 773 ceph_destroy_client(fsc->client); 774 775 kfree(fsc); 776 dout("destroy_fs_client %p done\n", fsc); 777 } 778 779 /* 780 * caches 781 */ 782 struct kmem_cache *ceph_inode_cachep; 783 struct kmem_cache *ceph_cap_cachep; 784 struct kmem_cache *ceph_cap_flush_cachep; 785 struct kmem_cache *ceph_dentry_cachep; 786 struct kmem_cache *ceph_file_cachep; 787 struct kmem_cache *ceph_dir_file_cachep; 788 789 static void ceph_inode_init_once(void *foo) 790 { 791 struct ceph_inode_info *ci = foo; 792 inode_init_once(&ci->vfs_inode); 793 } 794 795 static int __init init_caches(void) 796 { 797 int error = -ENOMEM; 798 799 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 800 sizeof(struct ceph_inode_info), 801 __alignof__(struct ceph_inode_info), 802 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 803 SLAB_ACCOUNT, ceph_inode_init_once); 804 if (!ceph_inode_cachep) 805 return -ENOMEM; 806 807 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 808 if (!ceph_cap_cachep) 809 goto bad_cap; 810 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 811 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 812 if (!ceph_cap_flush_cachep) 813 goto bad_cap_flush; 814 815 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 816 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 817 if (!ceph_dentry_cachep) 818 goto bad_dentry; 819 820 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 821 if (!ceph_file_cachep) 822 goto bad_file; 823 824 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 825 if (!ceph_dir_file_cachep) 826 goto bad_dir_file; 827 828 error = ceph_fscache_register(); 829 if (error) 830 goto bad_fscache; 831 832 return 0; 833 834 bad_fscache: 835 kmem_cache_destroy(ceph_dir_file_cachep); 836 bad_dir_file: 837 kmem_cache_destroy(ceph_file_cachep); 838 bad_file: 839 kmem_cache_destroy(ceph_dentry_cachep); 840 bad_dentry: 841 kmem_cache_destroy(ceph_cap_flush_cachep); 842 bad_cap_flush: 843 kmem_cache_destroy(ceph_cap_cachep); 844 bad_cap: 845 kmem_cache_destroy(ceph_inode_cachep); 846 return error; 847 } 848 849 static void destroy_caches(void) 850 { 851 /* 852 * Make sure all delayed rcu free inodes are flushed before we 853 * destroy cache. 854 */ 855 rcu_barrier(); 856 857 kmem_cache_destroy(ceph_inode_cachep); 858 kmem_cache_destroy(ceph_cap_cachep); 859 kmem_cache_destroy(ceph_cap_flush_cachep); 860 kmem_cache_destroy(ceph_dentry_cachep); 861 kmem_cache_destroy(ceph_file_cachep); 862 kmem_cache_destroy(ceph_dir_file_cachep); 863 864 ceph_fscache_unregister(); 865 } 866 867 /* 868 * ceph_umount_begin - initiate forced umount. Tear down down the 869 * mount, skipping steps that may hang while waiting for server(s). 870 */ 871 static void ceph_umount_begin(struct super_block *sb) 872 { 873 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 874 875 dout("ceph_umount_begin - starting forced umount\n"); 876 if (!fsc) 877 return; 878 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 879 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 880 ceph_mdsc_force_umount(fsc->mdsc); 881 fsc->filp_gen++; // invalidate open files 882 } 883 884 static const struct super_operations ceph_super_ops = { 885 .alloc_inode = ceph_alloc_inode, 886 .free_inode = ceph_free_inode, 887 .write_inode = ceph_write_inode, 888 .drop_inode = generic_delete_inode, 889 .evict_inode = ceph_evict_inode, 890 .sync_fs = ceph_sync_fs, 891 .put_super = ceph_put_super, 892 .show_options = ceph_show_options, 893 .statfs = ceph_statfs, 894 .umount_begin = ceph_umount_begin, 895 }; 896 897 /* 898 * Bootstrap mount by opening the root directory. Note the mount 899 * @started time from caller, and time out if this takes too long. 900 */ 901 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 902 const char *path, 903 unsigned long started) 904 { 905 struct ceph_mds_client *mdsc = fsc->mdsc; 906 struct ceph_mds_request *req = NULL; 907 int err; 908 struct dentry *root; 909 910 /* open dir */ 911 dout("open_root_inode opening '%s'\n", path); 912 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 913 if (IS_ERR(req)) 914 return ERR_CAST(req); 915 req->r_path1 = kstrdup(path, GFP_NOFS); 916 if (!req->r_path1) { 917 root = ERR_PTR(-ENOMEM); 918 goto out; 919 } 920 921 req->r_ino1.ino = CEPH_INO_ROOT; 922 req->r_ino1.snap = CEPH_NOSNAP; 923 req->r_started = started; 924 req->r_timeout = fsc->client->options->mount_timeout; 925 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 926 req->r_num_caps = 2; 927 err = ceph_mdsc_do_request(mdsc, NULL, req); 928 if (err == 0) { 929 struct inode *inode = req->r_target_inode; 930 req->r_target_inode = NULL; 931 dout("open_root_inode success\n"); 932 root = d_make_root(inode); 933 if (!root) { 934 root = ERR_PTR(-ENOMEM); 935 goto out; 936 } 937 dout("open_root_inode success, root dentry is %p\n", root); 938 } else { 939 root = ERR_PTR(err); 940 } 941 out: 942 ceph_mdsc_put_request(req); 943 return root; 944 } 945 946 /* 947 * mount: join the ceph cluster, and open root directory. 948 */ 949 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, 950 struct fs_context *fc) 951 { 952 int err; 953 unsigned long started = jiffies; /* note the start time */ 954 struct dentry *root; 955 956 dout("mount start %p\n", fsc); 957 mutex_lock(&fsc->client->mount_mutex); 958 959 if (!fsc->sb->s_root) { 960 const char *path, *p; 961 err = __ceph_open_session(fsc->client, started); 962 if (err < 0) 963 goto out; 964 965 /* setup fscache */ 966 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 967 err = ceph_fscache_register_fs(fsc, fc); 968 if (err < 0) 969 goto out; 970 } 971 972 p = path_remove_extra_slash(fsc->mount_options->server_path); 973 if (IS_ERR(p)) { 974 err = PTR_ERR(p); 975 goto out; 976 } 977 /* if the server path is omitted or just consists of '/' */ 978 if (!p) 979 path = ""; 980 else 981 path = p; 982 dout("mount opening path '%s'\n", path); 983 984 ceph_fs_debugfs_init(fsc); 985 986 root = open_root_dentry(fsc, path, started); 987 kfree(p); 988 if (IS_ERR(root)) { 989 err = PTR_ERR(root); 990 goto out; 991 } 992 fsc->sb->s_root = dget(root); 993 } else { 994 root = dget(fsc->sb->s_root); 995 } 996 997 fsc->mount_state = CEPH_MOUNT_MOUNTED; 998 dout("mount success\n"); 999 mutex_unlock(&fsc->client->mount_mutex); 1000 return root; 1001 1002 out: 1003 mutex_unlock(&fsc->client->mount_mutex); 1004 return ERR_PTR(err); 1005 } 1006 1007 static int ceph_set_super(struct super_block *s, struct fs_context *fc) 1008 { 1009 struct ceph_fs_client *fsc = s->s_fs_info; 1010 int ret; 1011 1012 dout("set_super %p\n", s); 1013 1014 s->s_maxbytes = MAX_LFS_FILESIZE; 1015 1016 s->s_xattr = ceph_xattr_handlers; 1017 fsc->sb = s; 1018 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 1019 1020 s->s_op = &ceph_super_ops; 1021 s->s_d_op = &ceph_dentry_ops; 1022 s->s_export_op = &ceph_export_ops; 1023 1024 s->s_time_gran = 1; 1025 s->s_time_min = 0; 1026 s->s_time_max = U32_MAX; 1027 1028 ret = set_anon_super_fc(s, fc); 1029 if (ret != 0) 1030 fsc->sb = NULL; 1031 return ret; 1032 } 1033 1034 /* 1035 * share superblock if same fs AND options 1036 */ 1037 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) 1038 { 1039 struct ceph_fs_client *new = fc->s_fs_info; 1040 struct ceph_mount_options *fsopt = new->mount_options; 1041 struct ceph_options *opt = new->client->options; 1042 struct ceph_fs_client *other = ceph_sb_to_client(sb); 1043 1044 dout("ceph_compare_super %p\n", sb); 1045 1046 if (compare_mount_options(fsopt, opt, other)) { 1047 dout("monitor(s)/mount options don't match\n"); 1048 return 0; 1049 } 1050 if ((opt->flags & CEPH_OPT_FSID) && 1051 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 1052 dout("fsid doesn't match\n"); 1053 return 0; 1054 } 1055 if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { 1056 dout("flags differ\n"); 1057 return 0; 1058 } 1059 return 1; 1060 } 1061 1062 /* 1063 * construct our own bdi so we can control readahead, etc. 1064 */ 1065 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1066 1067 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1068 { 1069 int err; 1070 1071 err = super_setup_bdi_name(sb, "ceph-%ld", 1072 atomic_long_inc_return(&bdi_seq)); 1073 if (err) 1074 return err; 1075 1076 /* set ra_pages based on rasize mount option? */ 1077 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1078 1079 /* set io_pages based on max osd read size */ 1080 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1081 1082 return 0; 1083 } 1084 1085 static int ceph_get_tree(struct fs_context *fc) 1086 { 1087 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1088 struct super_block *sb; 1089 struct ceph_fs_client *fsc; 1090 struct dentry *res; 1091 int (*compare_super)(struct super_block *, struct fs_context *) = 1092 ceph_compare_super; 1093 int err; 1094 1095 dout("ceph_get_tree\n"); 1096 1097 if (!fc->source) 1098 return invalfc(fc, "No source"); 1099 1100 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1101 fc->sb_flags |= SB_POSIXACL; 1102 #endif 1103 1104 /* create client (which we may/may not use) */ 1105 fsc = create_fs_client(pctx->opts, pctx->copts); 1106 pctx->opts = NULL; 1107 pctx->copts = NULL; 1108 if (IS_ERR(fsc)) { 1109 err = PTR_ERR(fsc); 1110 goto out_final; 1111 } 1112 1113 err = ceph_mdsc_init(fsc); 1114 if (err < 0) 1115 goto out; 1116 1117 if (ceph_test_opt(fsc->client, NOSHARE)) 1118 compare_super = NULL; 1119 1120 fc->s_fs_info = fsc; 1121 sb = sget_fc(fc, compare_super, ceph_set_super); 1122 fc->s_fs_info = NULL; 1123 if (IS_ERR(sb)) { 1124 err = PTR_ERR(sb); 1125 goto out; 1126 } 1127 1128 if (ceph_sb_to_client(sb) != fsc) { 1129 destroy_fs_client(fsc); 1130 fsc = ceph_sb_to_client(sb); 1131 dout("get_sb got existing client %p\n", fsc); 1132 } else { 1133 dout("get_sb using new client %p\n", fsc); 1134 err = ceph_setup_bdi(sb, fsc); 1135 if (err < 0) 1136 goto out_splat; 1137 } 1138 1139 res = ceph_real_mount(fsc, fc); 1140 if (IS_ERR(res)) { 1141 err = PTR_ERR(res); 1142 goto out_splat; 1143 } 1144 dout("root %p inode %p ino %llx.%llx\n", res, 1145 d_inode(res), ceph_vinop(d_inode(res))); 1146 fc->root = fsc->sb->s_root; 1147 return 0; 1148 1149 out_splat: 1150 if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { 1151 pr_info("No mds server is up or the cluster is laggy\n"); 1152 err = -EHOSTUNREACH; 1153 } 1154 1155 ceph_mdsc_close_sessions(fsc->mdsc); 1156 deactivate_locked_super(sb); 1157 goto out_final; 1158 1159 out: 1160 destroy_fs_client(fsc); 1161 out_final: 1162 dout("ceph_get_tree fail %d\n", err); 1163 return err; 1164 } 1165 1166 static void ceph_free_fc(struct fs_context *fc) 1167 { 1168 struct ceph_parse_opts_ctx *pctx = fc->fs_private; 1169 1170 if (pctx) { 1171 destroy_mount_options(pctx->opts); 1172 ceph_destroy_options(pctx->copts); 1173 kfree(pctx); 1174 } 1175 } 1176 1177 static int ceph_reconfigure_fc(struct fs_context *fc) 1178 { 1179 sync_filesystem(fc->root->d_sb); 1180 return 0; 1181 } 1182 1183 static const struct fs_context_operations ceph_context_ops = { 1184 .free = ceph_free_fc, 1185 .parse_param = ceph_parse_mount_param, 1186 .get_tree = ceph_get_tree, 1187 .reconfigure = ceph_reconfigure_fc, 1188 }; 1189 1190 /* 1191 * Set up the filesystem mount context. 1192 */ 1193 static int ceph_init_fs_context(struct fs_context *fc) 1194 { 1195 struct ceph_parse_opts_ctx *pctx; 1196 struct ceph_mount_options *fsopt; 1197 1198 pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); 1199 if (!pctx) 1200 return -ENOMEM; 1201 1202 pctx->copts = ceph_alloc_options(); 1203 if (!pctx->copts) 1204 goto nomem; 1205 1206 pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); 1207 if (!pctx->opts) 1208 goto nomem; 1209 1210 fsopt = pctx->opts; 1211 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 1212 1213 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 1214 fsopt->rsize = CEPH_MAX_READ_SIZE; 1215 fsopt->rasize = CEPH_RASIZE_DEFAULT; 1216 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 1217 if (!fsopt->snapdir_name) 1218 goto nomem; 1219 1220 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 1221 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 1222 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 1223 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 1224 fsopt->congestion_kb = default_congestion_kb(); 1225 1226 fc->fs_private = pctx; 1227 fc->ops = &ceph_context_ops; 1228 return 0; 1229 1230 nomem: 1231 destroy_mount_options(pctx->opts); 1232 ceph_destroy_options(pctx->copts); 1233 kfree(pctx); 1234 return -ENOMEM; 1235 } 1236 1237 static void ceph_kill_sb(struct super_block *s) 1238 { 1239 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1240 dev_t dev = s->s_dev; 1241 1242 dout("kill_sb %p\n", s); 1243 1244 ceph_mdsc_pre_umount(fsc->mdsc); 1245 flush_fs_workqueues(fsc); 1246 1247 generic_shutdown_super(s); 1248 1249 fsc->client->extra_mon_dispatch = NULL; 1250 ceph_fs_debugfs_cleanup(fsc); 1251 1252 ceph_fscache_unregister_fs(fsc); 1253 1254 destroy_fs_client(fsc); 1255 free_anon_bdev(dev); 1256 } 1257 1258 static struct file_system_type ceph_fs_type = { 1259 .owner = THIS_MODULE, 1260 .name = "ceph", 1261 .init_fs_context = ceph_init_fs_context, 1262 .kill_sb = ceph_kill_sb, 1263 .fs_flags = FS_RENAME_DOES_D_MOVE, 1264 }; 1265 MODULE_ALIAS_FS("ceph"); 1266 1267 int ceph_force_reconnect(struct super_block *sb) 1268 { 1269 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 1270 int err = 0; 1271 1272 ceph_umount_begin(sb); 1273 1274 /* Make sure all page caches get invalidated. 1275 * see remove_session_caps_cb() */ 1276 flush_workqueue(fsc->inode_wq); 1277 1278 /* In case that we were blacklisted. This also reset 1279 * all mon/osd connections */ 1280 ceph_reset_client_addr(fsc->client); 1281 1282 ceph_osdc_clear_abort_err(&fsc->client->osdc); 1283 1284 fsc->blacklisted = false; 1285 fsc->mount_state = CEPH_MOUNT_MOUNTED; 1286 1287 if (sb->s_root) { 1288 err = __ceph_do_getattr(d_inode(sb->s_root), NULL, 1289 CEPH_STAT_CAP_INODE, true); 1290 } 1291 return err; 1292 } 1293 1294 static int __init init_ceph(void) 1295 { 1296 int ret = init_caches(); 1297 if (ret) 1298 goto out; 1299 1300 ceph_flock_init(); 1301 ret = register_filesystem(&ceph_fs_type); 1302 if (ret) 1303 goto out_caches; 1304 1305 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1306 1307 return 0; 1308 1309 out_caches: 1310 destroy_caches(); 1311 out: 1312 return ret; 1313 } 1314 1315 static void __exit exit_ceph(void) 1316 { 1317 dout("exit_ceph\n"); 1318 unregister_filesystem(&ceph_fs_type); 1319 destroy_caches(); 1320 } 1321 1322 module_init(init_ceph); 1323 module_exit(exit_ceph); 1324 1325 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1326 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1327 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1328 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1329 MODULE_LICENSE("GPL"); 1330