1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/ceph/ceph_debug.h> 4 5 #include <linux/backing-dev.h> 6 #include <linux/ctype.h> 7 #include <linux/fs.h> 8 #include <linux/inet.h> 9 #include <linux/in6.h> 10 #include <linux/module.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/sched.h> 14 #include <linux/seq_file.h> 15 #include <linux/slab.h> 16 #include <linux/statfs.h> 17 #include <linux/string.h> 18 19 #include "super.h" 20 #include "mds_client.h" 21 #include "cache.h" 22 23 #include <linux/ceph/ceph_features.h> 24 #include <linux/ceph/decode.h> 25 #include <linux/ceph/mon_client.h> 26 #include <linux/ceph/auth.h> 27 #include <linux/ceph/debugfs.h> 28 29 /* 30 * Ceph superblock operations 31 * 32 * Handle the basics of mounting, unmounting. 33 */ 34 35 /* 36 * super ops 37 */ 38 static void ceph_put_super(struct super_block *s) 39 { 40 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 41 42 dout("put_super\n"); 43 ceph_mdsc_close_sessions(fsc->mdsc); 44 } 45 46 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 47 { 48 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 49 struct ceph_mon_client *monc = &fsc->client->monc; 50 struct ceph_statfs st; 51 u64 fsid; 52 int err; 53 u64 data_pool; 54 55 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 56 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 57 } else { 58 data_pool = CEPH_NOPOOL; 59 } 60 61 dout("statfs\n"); 62 err = ceph_monc_do_statfs(monc, data_pool, &st); 63 if (err < 0) 64 return err; 65 66 /* fill in kstatfs */ 67 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 68 69 /* 70 * express utilization in terms of large blocks to avoid 71 * overflow on 32-bit machines. 72 * 73 * NOTE: for the time being, we make bsize == frsize to humor 74 * not-yet-ancient versions of glibc that are broken. 75 * Someday, we will probably want to report a real block 76 * size... whatever that may mean for a network file system! 77 */ 78 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 79 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 80 81 /* 82 * By default use root quota for stats; fallback to overall filesystem 83 * usage if using 'noquotadf' mount option or if the root dir doesn't 84 * have max_bytes quota set. 85 */ 86 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 87 !ceph_quota_update_statfs(fsc, buf)) { 88 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 89 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 90 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 91 } 92 93 buf->f_files = le64_to_cpu(st.num_objects); 94 buf->f_ffree = -1; 95 buf->f_namelen = NAME_MAX; 96 97 /* Must convert the fsid, for consistent values across arches */ 98 mutex_lock(&monc->mutex); 99 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 100 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 101 mutex_unlock(&monc->mutex); 102 103 buf->f_fsid.val[0] = fsid & 0xffffffff; 104 buf->f_fsid.val[1] = fsid >> 32; 105 106 return 0; 107 } 108 109 110 static int ceph_sync_fs(struct super_block *sb, int wait) 111 { 112 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 113 114 if (!wait) { 115 dout("sync_fs (non-blocking)\n"); 116 ceph_flush_dirty_caps(fsc->mdsc); 117 dout("sync_fs (non-blocking) done\n"); 118 return 0; 119 } 120 121 dout("sync_fs (blocking)\n"); 122 ceph_osdc_sync(&fsc->client->osdc); 123 ceph_mdsc_sync(fsc->mdsc); 124 dout("sync_fs (blocking) done\n"); 125 return 0; 126 } 127 128 /* 129 * mount options 130 */ 131 enum { 132 Opt_wsize, 133 Opt_rsize, 134 Opt_rasize, 135 Opt_caps_wanted_delay_min, 136 Opt_caps_wanted_delay_max, 137 Opt_caps_max, 138 Opt_readdir_max_entries, 139 Opt_readdir_max_bytes, 140 Opt_congestion_kb, 141 Opt_last_int, 142 /* int args above */ 143 Opt_snapdirname, 144 Opt_mds_namespace, 145 Opt_fscache_uniq, 146 Opt_last_string, 147 /* string args above */ 148 Opt_dirstat, 149 Opt_nodirstat, 150 Opt_rbytes, 151 Opt_norbytes, 152 Opt_asyncreaddir, 153 Opt_noasyncreaddir, 154 Opt_dcache, 155 Opt_nodcache, 156 Opt_ino32, 157 Opt_noino32, 158 Opt_fscache, 159 Opt_nofscache, 160 Opt_poolperm, 161 Opt_nopoolperm, 162 Opt_require_active_mds, 163 Opt_norequire_active_mds, 164 #ifdef CONFIG_CEPH_FS_POSIX_ACL 165 Opt_acl, 166 #endif 167 Opt_noacl, 168 Opt_quotadf, 169 Opt_noquotadf, 170 Opt_copyfrom, 171 Opt_nocopyfrom, 172 }; 173 174 static match_table_t fsopt_tokens = { 175 {Opt_wsize, "wsize=%d"}, 176 {Opt_rsize, "rsize=%d"}, 177 {Opt_rasize, "rasize=%d"}, 178 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 179 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 180 {Opt_caps_max, "caps_max=%d"}, 181 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 182 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 183 {Opt_congestion_kb, "write_congestion_kb=%d"}, 184 /* int args above */ 185 {Opt_snapdirname, "snapdirname=%s"}, 186 {Opt_mds_namespace, "mds_namespace=%s"}, 187 {Opt_fscache_uniq, "fsc=%s"}, 188 /* string args above */ 189 {Opt_dirstat, "dirstat"}, 190 {Opt_nodirstat, "nodirstat"}, 191 {Opt_rbytes, "rbytes"}, 192 {Opt_norbytes, "norbytes"}, 193 {Opt_asyncreaddir, "asyncreaddir"}, 194 {Opt_noasyncreaddir, "noasyncreaddir"}, 195 {Opt_dcache, "dcache"}, 196 {Opt_nodcache, "nodcache"}, 197 {Opt_ino32, "ino32"}, 198 {Opt_noino32, "noino32"}, 199 {Opt_fscache, "fsc"}, 200 {Opt_nofscache, "nofsc"}, 201 {Opt_poolperm, "poolperm"}, 202 {Opt_nopoolperm, "nopoolperm"}, 203 {Opt_require_active_mds, "require_active_mds"}, 204 {Opt_norequire_active_mds, "norequire_active_mds"}, 205 #ifdef CONFIG_CEPH_FS_POSIX_ACL 206 {Opt_acl, "acl"}, 207 #endif 208 {Opt_noacl, "noacl"}, 209 {Opt_quotadf, "quotadf"}, 210 {Opt_noquotadf, "noquotadf"}, 211 {Opt_copyfrom, "copyfrom"}, 212 {Opt_nocopyfrom, "nocopyfrom"}, 213 {-1, NULL} 214 }; 215 216 static int parse_fsopt_token(char *c, void *private) 217 { 218 struct ceph_mount_options *fsopt = private; 219 substring_t argstr[MAX_OPT_ARGS]; 220 int token, intval, ret; 221 222 token = match_token((char *)c, fsopt_tokens, argstr); 223 if (token < 0) 224 return -EINVAL; 225 226 if (token < Opt_last_int) { 227 ret = match_int(&argstr[0], &intval); 228 if (ret < 0) { 229 pr_err("bad option arg (not int) at '%s'\n", c); 230 return ret; 231 } 232 dout("got int token %d val %d\n", token, intval); 233 } else if (token > Opt_last_int && token < Opt_last_string) { 234 dout("got string token %d val %s\n", token, 235 argstr[0].from); 236 } else { 237 dout("got token %d\n", token); 238 } 239 240 switch (token) { 241 case Opt_snapdirname: 242 kfree(fsopt->snapdir_name); 243 fsopt->snapdir_name = kstrndup(argstr[0].from, 244 argstr[0].to-argstr[0].from, 245 GFP_KERNEL); 246 if (!fsopt->snapdir_name) 247 return -ENOMEM; 248 break; 249 case Opt_mds_namespace: 250 kfree(fsopt->mds_namespace); 251 fsopt->mds_namespace = kstrndup(argstr[0].from, 252 argstr[0].to-argstr[0].from, 253 GFP_KERNEL); 254 if (!fsopt->mds_namespace) 255 return -ENOMEM; 256 break; 257 case Opt_fscache_uniq: 258 kfree(fsopt->fscache_uniq); 259 fsopt->fscache_uniq = kstrndup(argstr[0].from, 260 argstr[0].to-argstr[0].from, 261 GFP_KERNEL); 262 if (!fsopt->fscache_uniq) 263 return -ENOMEM; 264 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 265 break; 266 /* misc */ 267 case Opt_wsize: 268 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 269 return -EINVAL; 270 fsopt->wsize = ALIGN(intval, PAGE_SIZE); 271 break; 272 case Opt_rsize: 273 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 274 return -EINVAL; 275 fsopt->rsize = ALIGN(intval, PAGE_SIZE); 276 break; 277 case Opt_rasize: 278 if (intval < 0) 279 return -EINVAL; 280 fsopt->rasize = ALIGN(intval, PAGE_SIZE); 281 break; 282 case Opt_caps_wanted_delay_min: 283 if (intval < 1) 284 return -EINVAL; 285 fsopt->caps_wanted_delay_min = intval; 286 break; 287 case Opt_caps_wanted_delay_max: 288 if (intval < 1) 289 return -EINVAL; 290 fsopt->caps_wanted_delay_max = intval; 291 break; 292 case Opt_caps_max: 293 if (intval < 0) 294 return -EINVAL; 295 fsopt->caps_max = intval; 296 break; 297 case Opt_readdir_max_entries: 298 if (intval < 1) 299 return -EINVAL; 300 fsopt->max_readdir = intval; 301 break; 302 case Opt_readdir_max_bytes: 303 if (intval < (int)PAGE_SIZE && intval != 0) 304 return -EINVAL; 305 fsopt->max_readdir_bytes = intval; 306 break; 307 case Opt_congestion_kb: 308 if (intval < 1024) /* at least 1M */ 309 return -EINVAL; 310 fsopt->congestion_kb = intval; 311 break; 312 case Opt_dirstat: 313 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 314 break; 315 case Opt_nodirstat: 316 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 317 break; 318 case Opt_rbytes: 319 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 320 break; 321 case Opt_norbytes: 322 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 323 break; 324 case Opt_asyncreaddir: 325 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 326 break; 327 case Opt_noasyncreaddir: 328 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 329 break; 330 case Opt_dcache: 331 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 332 break; 333 case Opt_nodcache: 334 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 335 break; 336 case Opt_ino32: 337 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 338 break; 339 case Opt_noino32: 340 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 341 break; 342 case Opt_fscache: 343 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 344 kfree(fsopt->fscache_uniq); 345 fsopt->fscache_uniq = NULL; 346 break; 347 case Opt_nofscache: 348 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 349 kfree(fsopt->fscache_uniq); 350 fsopt->fscache_uniq = NULL; 351 break; 352 case Opt_poolperm: 353 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 354 break; 355 case Opt_nopoolperm: 356 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 357 break; 358 case Opt_require_active_mds: 359 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 360 break; 361 case Opt_norequire_active_mds: 362 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 363 break; 364 case Opt_quotadf: 365 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 366 break; 367 case Opt_noquotadf: 368 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 369 break; 370 case Opt_copyfrom: 371 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 372 break; 373 case Opt_nocopyfrom: 374 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 375 break; 376 #ifdef CONFIG_CEPH_FS_POSIX_ACL 377 case Opt_acl: 378 fsopt->sb_flags |= SB_POSIXACL; 379 break; 380 #endif 381 case Opt_noacl: 382 fsopt->sb_flags &= ~SB_POSIXACL; 383 break; 384 default: 385 BUG_ON(token); 386 } 387 return 0; 388 } 389 390 static void destroy_mount_options(struct ceph_mount_options *args) 391 { 392 dout("destroy_mount_options %p\n", args); 393 kfree(args->snapdir_name); 394 kfree(args->mds_namespace); 395 kfree(args->server_path); 396 kfree(args->fscache_uniq); 397 kfree(args); 398 } 399 400 static int strcmp_null(const char *s1, const char *s2) 401 { 402 if (!s1 && !s2) 403 return 0; 404 if (s1 && !s2) 405 return -1; 406 if (!s1 && s2) 407 return 1; 408 return strcmp(s1, s2); 409 } 410 411 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 412 struct ceph_options *new_opt, 413 struct ceph_fs_client *fsc) 414 { 415 struct ceph_mount_options *fsopt1 = new_fsopt; 416 struct ceph_mount_options *fsopt2 = fsc->mount_options; 417 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 418 int ret; 419 420 ret = memcmp(fsopt1, fsopt2, ofs); 421 if (ret) 422 return ret; 423 424 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 425 if (ret) 426 return ret; 427 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 428 if (ret) 429 return ret; 430 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 431 if (ret) 432 return ret; 433 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 434 if (ret) 435 return ret; 436 437 return ceph_compare_options(new_opt, fsc->client); 438 } 439 440 static int parse_mount_options(struct ceph_mount_options **pfsopt, 441 struct ceph_options **popt, 442 int flags, char *options, 443 const char *dev_name) 444 { 445 struct ceph_mount_options *fsopt; 446 const char *dev_name_end; 447 int err; 448 449 if (!dev_name || !*dev_name) 450 return -EINVAL; 451 452 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 453 if (!fsopt) 454 return -ENOMEM; 455 456 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 457 458 fsopt->sb_flags = flags; 459 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 460 461 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 462 fsopt->rsize = CEPH_MAX_READ_SIZE; 463 fsopt->rasize = CEPH_RASIZE_DEFAULT; 464 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 465 if (!fsopt->snapdir_name) { 466 err = -ENOMEM; 467 goto out; 468 } 469 470 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 471 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 472 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 473 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 474 fsopt->congestion_kb = default_congestion_kb(); 475 476 /* 477 * Distinguish the server list from the path in "dev_name". 478 * Internally we do not include the leading '/' in the path. 479 * 480 * "dev_name" will look like: 481 * <server_spec>[,<server_spec>...]:[<path>] 482 * where 483 * <server_spec> is <ip>[:<port>] 484 * <path> is optional, but if present must begin with '/' 485 */ 486 dev_name_end = strchr(dev_name, '/'); 487 if (dev_name_end) { 488 if (strlen(dev_name_end) > 1) { 489 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 490 if (!fsopt->server_path) { 491 err = -ENOMEM; 492 goto out; 493 } 494 } 495 } else { 496 dev_name_end = dev_name + strlen(dev_name); 497 } 498 err = -EINVAL; 499 dev_name_end--; /* back up to ':' separator */ 500 if (dev_name_end < dev_name || *dev_name_end != ':') { 501 pr_err("device name is missing path (no : separator in %s)\n", 502 dev_name); 503 goto out; 504 } 505 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 506 if (fsopt->server_path) 507 dout("server path '%s'\n", fsopt->server_path); 508 509 *popt = ceph_parse_options(options, dev_name, dev_name_end, 510 parse_fsopt_token, (void *)fsopt); 511 if (IS_ERR(*popt)) { 512 err = PTR_ERR(*popt); 513 goto out; 514 } 515 516 /* success */ 517 *pfsopt = fsopt; 518 return 0; 519 520 out: 521 destroy_mount_options(fsopt); 522 return err; 523 } 524 525 /** 526 * ceph_show_options - Show mount options in /proc/mounts 527 * @m: seq_file to write to 528 * @root: root of that (sub)tree 529 */ 530 static int ceph_show_options(struct seq_file *m, struct dentry *root) 531 { 532 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 533 struct ceph_mount_options *fsopt = fsc->mount_options; 534 size_t pos; 535 int ret; 536 537 /* a comma between MNT/MS and client options */ 538 seq_putc(m, ','); 539 pos = m->count; 540 541 ret = ceph_print_client_options(m, fsc->client, false); 542 if (ret) 543 return ret; 544 545 /* retract our comma if no client options */ 546 if (m->count == pos) 547 m->count--; 548 549 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 550 seq_puts(m, ",dirstat"); 551 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 552 seq_puts(m, ",rbytes"); 553 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 554 seq_puts(m, ",noasyncreaddir"); 555 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 556 seq_puts(m, ",nodcache"); 557 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 558 seq_puts(m, ",ino32"); 559 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 560 seq_show_option(m, "fsc", fsopt->fscache_uniq); 561 } 562 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 563 seq_puts(m, ",nopoolperm"); 564 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 565 seq_puts(m, ",noquotadf"); 566 567 #ifdef CONFIG_CEPH_FS_POSIX_ACL 568 if (fsopt->sb_flags & SB_POSIXACL) 569 seq_puts(m, ",acl"); 570 else 571 seq_puts(m, ",noacl"); 572 #endif 573 574 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 575 seq_puts(m, ",copyfrom"); 576 577 if (fsopt->mds_namespace) 578 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 579 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 580 seq_printf(m, ",wsize=%d", fsopt->wsize); 581 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 582 seq_printf(m, ",rsize=%d", fsopt->rsize); 583 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 584 seq_printf(m, ",rasize=%d", fsopt->rasize); 585 if (fsopt->congestion_kb != default_congestion_kb()) 586 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 587 if (fsopt->caps_max) 588 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 589 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 590 seq_printf(m, ",caps_wanted_delay_min=%d", 591 fsopt->caps_wanted_delay_min); 592 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 593 seq_printf(m, ",caps_wanted_delay_max=%d", 594 fsopt->caps_wanted_delay_max); 595 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 596 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 597 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 598 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 599 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 600 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 601 602 return 0; 603 } 604 605 /* 606 * handle any mon messages the standard library doesn't understand. 607 * return error if we don't either. 608 */ 609 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 610 { 611 struct ceph_fs_client *fsc = client->private; 612 int type = le16_to_cpu(msg->hdr.type); 613 614 switch (type) { 615 case CEPH_MSG_MDS_MAP: 616 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 617 return 0; 618 case CEPH_MSG_FS_MAP_USER: 619 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 620 return 0; 621 default: 622 return -1; 623 } 624 } 625 626 /* 627 * create a new fs client 628 * 629 * Success or not, this function consumes @fsopt and @opt. 630 */ 631 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 632 struct ceph_options *opt) 633 { 634 struct ceph_fs_client *fsc; 635 int page_count; 636 size_t size; 637 int err; 638 639 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 640 if (!fsc) { 641 err = -ENOMEM; 642 goto fail; 643 } 644 645 fsc->client = ceph_create_client(opt, fsc); 646 if (IS_ERR(fsc->client)) { 647 err = PTR_ERR(fsc->client); 648 goto fail; 649 } 650 opt = NULL; /* fsc->client now owns this */ 651 652 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 653 ceph_set_opt(fsc->client, ABORT_ON_FULL); 654 655 if (!fsopt->mds_namespace) { 656 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 657 0, true); 658 } else { 659 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 660 0, false); 661 } 662 663 fsc->mount_options = fsopt; 664 665 fsc->sb = NULL; 666 fsc->mount_state = CEPH_MOUNT_MOUNTING; 667 668 atomic_long_set(&fsc->writeback_count, 0); 669 670 err = -ENOMEM; 671 /* 672 * The number of concurrent works can be high but they don't need 673 * to be processed in parallel, limit concurrency. 674 */ 675 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 676 if (!fsc->wb_wq) 677 goto fail_client; 678 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 679 if (!fsc->pg_inv_wq) 680 goto fail_wb_wq; 681 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 682 if (!fsc->trunc_wq) 683 goto fail_pg_inv_wq; 684 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 685 if (!fsc->cap_wq) 686 goto fail_trunc_wq; 687 688 /* set up mempools */ 689 err = -ENOMEM; 690 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 691 size = sizeof (struct page *) * (page_count ? page_count : 1); 692 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 693 if (!fsc->wb_pagevec_pool) 694 goto fail_cap_wq; 695 696 return fsc; 697 698 fail_cap_wq: 699 destroy_workqueue(fsc->cap_wq); 700 fail_trunc_wq: 701 destroy_workqueue(fsc->trunc_wq); 702 fail_pg_inv_wq: 703 destroy_workqueue(fsc->pg_inv_wq); 704 fail_wb_wq: 705 destroy_workqueue(fsc->wb_wq); 706 fail_client: 707 ceph_destroy_client(fsc->client); 708 fail: 709 kfree(fsc); 710 if (opt) 711 ceph_destroy_options(opt); 712 destroy_mount_options(fsopt); 713 return ERR_PTR(err); 714 } 715 716 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 717 { 718 flush_workqueue(fsc->wb_wq); 719 flush_workqueue(fsc->pg_inv_wq); 720 flush_workqueue(fsc->trunc_wq); 721 flush_workqueue(fsc->cap_wq); 722 } 723 724 static void destroy_fs_client(struct ceph_fs_client *fsc) 725 { 726 dout("destroy_fs_client %p\n", fsc); 727 728 destroy_workqueue(fsc->wb_wq); 729 destroy_workqueue(fsc->pg_inv_wq); 730 destroy_workqueue(fsc->trunc_wq); 731 destroy_workqueue(fsc->cap_wq); 732 733 mempool_destroy(fsc->wb_pagevec_pool); 734 735 destroy_mount_options(fsc->mount_options); 736 737 ceph_destroy_client(fsc->client); 738 739 kfree(fsc); 740 dout("destroy_fs_client %p done\n", fsc); 741 } 742 743 /* 744 * caches 745 */ 746 struct kmem_cache *ceph_inode_cachep; 747 struct kmem_cache *ceph_cap_cachep; 748 struct kmem_cache *ceph_cap_flush_cachep; 749 struct kmem_cache *ceph_dentry_cachep; 750 struct kmem_cache *ceph_file_cachep; 751 struct kmem_cache *ceph_dir_file_cachep; 752 753 static void ceph_inode_init_once(void *foo) 754 { 755 struct ceph_inode_info *ci = foo; 756 inode_init_once(&ci->vfs_inode); 757 } 758 759 static int __init init_caches(void) 760 { 761 int error = -ENOMEM; 762 763 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 764 sizeof(struct ceph_inode_info), 765 __alignof__(struct ceph_inode_info), 766 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 767 SLAB_ACCOUNT, ceph_inode_init_once); 768 if (!ceph_inode_cachep) 769 return -ENOMEM; 770 771 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 772 if (!ceph_cap_cachep) 773 goto bad_cap; 774 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 775 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 776 if (!ceph_cap_flush_cachep) 777 goto bad_cap_flush; 778 779 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 780 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 781 if (!ceph_dentry_cachep) 782 goto bad_dentry; 783 784 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 785 if (!ceph_file_cachep) 786 goto bad_file; 787 788 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 789 if (!ceph_dir_file_cachep) 790 goto bad_dir_file; 791 792 error = ceph_fscache_register(); 793 if (error) 794 goto bad_fscache; 795 796 return 0; 797 798 bad_fscache: 799 kmem_cache_destroy(ceph_dir_file_cachep); 800 bad_dir_file: 801 kmem_cache_destroy(ceph_file_cachep); 802 bad_file: 803 kmem_cache_destroy(ceph_dentry_cachep); 804 bad_dentry: 805 kmem_cache_destroy(ceph_cap_flush_cachep); 806 bad_cap_flush: 807 kmem_cache_destroy(ceph_cap_cachep); 808 bad_cap: 809 kmem_cache_destroy(ceph_inode_cachep); 810 return error; 811 } 812 813 static void destroy_caches(void) 814 { 815 /* 816 * Make sure all delayed rcu free inodes are flushed before we 817 * destroy cache. 818 */ 819 rcu_barrier(); 820 821 kmem_cache_destroy(ceph_inode_cachep); 822 kmem_cache_destroy(ceph_cap_cachep); 823 kmem_cache_destroy(ceph_cap_flush_cachep); 824 kmem_cache_destroy(ceph_dentry_cachep); 825 kmem_cache_destroy(ceph_file_cachep); 826 kmem_cache_destroy(ceph_dir_file_cachep); 827 828 ceph_fscache_unregister(); 829 } 830 831 832 /* 833 * ceph_umount_begin - initiate forced umount. Tear down down the 834 * mount, skipping steps that may hang while waiting for server(s). 835 */ 836 static void ceph_umount_begin(struct super_block *sb) 837 { 838 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 839 840 dout("ceph_umount_begin - starting forced umount\n"); 841 if (!fsc) 842 return; 843 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 844 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 845 ceph_mdsc_force_umount(fsc->mdsc); 846 return; 847 } 848 849 static int ceph_remount(struct super_block *sb, int *flags, char *data) 850 { 851 sync_filesystem(sb); 852 return 0; 853 } 854 855 static const struct super_operations ceph_super_ops = { 856 .alloc_inode = ceph_alloc_inode, 857 .destroy_inode = ceph_destroy_inode, 858 .free_inode = ceph_free_inode, 859 .write_inode = ceph_write_inode, 860 .drop_inode = ceph_drop_inode, 861 .sync_fs = ceph_sync_fs, 862 .put_super = ceph_put_super, 863 .remount_fs = ceph_remount, 864 .show_options = ceph_show_options, 865 .statfs = ceph_statfs, 866 .umount_begin = ceph_umount_begin, 867 }; 868 869 /* 870 * Bootstrap mount by opening the root directory. Note the mount 871 * @started time from caller, and time out if this takes too long. 872 */ 873 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 874 const char *path, 875 unsigned long started) 876 { 877 struct ceph_mds_client *mdsc = fsc->mdsc; 878 struct ceph_mds_request *req = NULL; 879 int err; 880 struct dentry *root; 881 882 /* open dir */ 883 dout("open_root_inode opening '%s'\n", path); 884 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 885 if (IS_ERR(req)) 886 return ERR_CAST(req); 887 req->r_path1 = kstrdup(path, GFP_NOFS); 888 if (!req->r_path1) { 889 root = ERR_PTR(-ENOMEM); 890 goto out; 891 } 892 893 req->r_ino1.ino = CEPH_INO_ROOT; 894 req->r_ino1.snap = CEPH_NOSNAP; 895 req->r_started = started; 896 req->r_timeout = fsc->client->options->mount_timeout; 897 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 898 req->r_num_caps = 2; 899 err = ceph_mdsc_do_request(mdsc, NULL, req); 900 if (err == 0) { 901 struct inode *inode = req->r_target_inode; 902 req->r_target_inode = NULL; 903 dout("open_root_inode success\n"); 904 root = d_make_root(inode); 905 if (!root) { 906 root = ERR_PTR(-ENOMEM); 907 goto out; 908 } 909 dout("open_root_inode success, root dentry is %p\n", root); 910 } else { 911 root = ERR_PTR(err); 912 } 913 out: 914 ceph_mdsc_put_request(req); 915 return root; 916 } 917 918 919 920 921 /* 922 * mount: join the ceph cluster, and open root directory. 923 */ 924 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) 925 { 926 int err; 927 unsigned long started = jiffies; /* note the start time */ 928 struct dentry *root; 929 930 dout("mount start %p\n", fsc); 931 mutex_lock(&fsc->client->mount_mutex); 932 933 if (!fsc->sb->s_root) { 934 const char *path; 935 err = __ceph_open_session(fsc->client, started); 936 if (err < 0) 937 goto out; 938 939 /* setup fscache */ 940 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 941 err = ceph_fscache_register_fs(fsc); 942 if (err < 0) 943 goto out; 944 } 945 946 if (!fsc->mount_options->server_path) { 947 path = ""; 948 dout("mount opening path \\t\n"); 949 } else { 950 path = fsc->mount_options->server_path + 1; 951 dout("mount opening path %s\n", path); 952 } 953 954 err = ceph_fs_debugfs_init(fsc); 955 if (err < 0) 956 goto out; 957 958 root = open_root_dentry(fsc, path, started); 959 if (IS_ERR(root)) { 960 err = PTR_ERR(root); 961 goto out; 962 } 963 fsc->sb->s_root = dget(root); 964 } else { 965 root = dget(fsc->sb->s_root); 966 } 967 968 fsc->mount_state = CEPH_MOUNT_MOUNTED; 969 dout("mount success\n"); 970 mutex_unlock(&fsc->client->mount_mutex); 971 return root; 972 973 out: 974 mutex_unlock(&fsc->client->mount_mutex); 975 return ERR_PTR(err); 976 } 977 978 static int ceph_set_super(struct super_block *s, void *data) 979 { 980 struct ceph_fs_client *fsc = data; 981 int ret; 982 983 dout("set_super %p data %p\n", s, data); 984 985 s->s_flags = fsc->mount_options->sb_flags; 986 s->s_maxbytes = MAX_LFS_FILESIZE; 987 988 s->s_xattr = ceph_xattr_handlers; 989 s->s_fs_info = fsc; 990 fsc->sb = s; 991 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 992 993 s->s_op = &ceph_super_ops; 994 s->s_d_op = &ceph_dentry_ops; 995 s->s_export_op = &ceph_export_ops; 996 997 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 998 999 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 1000 if (ret != 0) 1001 goto fail; 1002 1003 return ret; 1004 1005 fail: 1006 s->s_fs_info = NULL; 1007 fsc->sb = NULL; 1008 return ret; 1009 } 1010 1011 /* 1012 * share superblock if same fs AND options 1013 */ 1014 static int ceph_compare_super(struct super_block *sb, void *data) 1015 { 1016 struct ceph_fs_client *new = data; 1017 struct ceph_mount_options *fsopt = new->mount_options; 1018 struct ceph_options *opt = new->client->options; 1019 struct ceph_fs_client *other = ceph_sb_to_client(sb); 1020 1021 dout("ceph_compare_super %p\n", sb); 1022 1023 if (compare_mount_options(fsopt, opt, other)) { 1024 dout("monitor(s)/mount options don't match\n"); 1025 return 0; 1026 } 1027 if ((opt->flags & CEPH_OPT_FSID) && 1028 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 1029 dout("fsid doesn't match\n"); 1030 return 0; 1031 } 1032 if (fsopt->sb_flags != other->mount_options->sb_flags) { 1033 dout("flags differ\n"); 1034 return 0; 1035 } 1036 return 1; 1037 } 1038 1039 /* 1040 * construct our own bdi so we can control readahead, etc. 1041 */ 1042 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1043 1044 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1045 { 1046 int err; 1047 1048 err = super_setup_bdi_name(sb, "ceph-%ld", 1049 atomic_long_inc_return(&bdi_seq)); 1050 if (err) 1051 return err; 1052 1053 /* set ra_pages based on rasize mount option? */ 1054 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1055 1056 /* set io_pages based on max osd read size */ 1057 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1058 1059 return 0; 1060 } 1061 1062 static struct dentry *ceph_mount(struct file_system_type *fs_type, 1063 int flags, const char *dev_name, void *data) 1064 { 1065 struct super_block *sb; 1066 struct ceph_fs_client *fsc; 1067 struct dentry *res; 1068 int err; 1069 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 1070 struct ceph_mount_options *fsopt = NULL; 1071 struct ceph_options *opt = NULL; 1072 1073 dout("ceph_mount\n"); 1074 1075 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1076 flags |= SB_POSIXACL; 1077 #endif 1078 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); 1079 if (err < 0) { 1080 res = ERR_PTR(err); 1081 goto out_final; 1082 } 1083 1084 /* create client (which we may/may not use) */ 1085 fsc = create_fs_client(fsopt, opt); 1086 if (IS_ERR(fsc)) { 1087 res = ERR_CAST(fsc); 1088 goto out_final; 1089 } 1090 1091 err = ceph_mdsc_init(fsc); 1092 if (err < 0) { 1093 res = ERR_PTR(err); 1094 goto out; 1095 } 1096 1097 if (ceph_test_opt(fsc->client, NOSHARE)) 1098 compare_super = NULL; 1099 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 1100 if (IS_ERR(sb)) { 1101 res = ERR_CAST(sb); 1102 goto out; 1103 } 1104 1105 if (ceph_sb_to_client(sb) != fsc) { 1106 ceph_mdsc_destroy(fsc); 1107 destroy_fs_client(fsc); 1108 fsc = ceph_sb_to_client(sb); 1109 dout("get_sb got existing client %p\n", fsc); 1110 } else { 1111 dout("get_sb using new client %p\n", fsc); 1112 err = ceph_setup_bdi(sb, fsc); 1113 if (err < 0) { 1114 res = ERR_PTR(err); 1115 goto out_splat; 1116 } 1117 } 1118 1119 res = ceph_real_mount(fsc); 1120 if (IS_ERR(res)) 1121 goto out_splat; 1122 dout("root %p inode %p ino %llx.%llx\n", res, 1123 d_inode(res), ceph_vinop(d_inode(res))); 1124 return res; 1125 1126 out_splat: 1127 ceph_mdsc_close_sessions(fsc->mdsc); 1128 deactivate_locked_super(sb); 1129 goto out_final; 1130 1131 out: 1132 ceph_mdsc_destroy(fsc); 1133 destroy_fs_client(fsc); 1134 out_final: 1135 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 1136 return res; 1137 } 1138 1139 static void ceph_kill_sb(struct super_block *s) 1140 { 1141 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1142 dev_t dev = s->s_dev; 1143 1144 dout("kill_sb %p\n", s); 1145 1146 ceph_mdsc_pre_umount(fsc->mdsc); 1147 flush_fs_workqueues(fsc); 1148 1149 generic_shutdown_super(s); 1150 1151 fsc->client->extra_mon_dispatch = NULL; 1152 ceph_fs_debugfs_cleanup(fsc); 1153 1154 ceph_fscache_unregister_fs(fsc); 1155 1156 ceph_mdsc_destroy(fsc); 1157 1158 destroy_fs_client(fsc); 1159 free_anon_bdev(dev); 1160 } 1161 1162 static struct file_system_type ceph_fs_type = { 1163 .owner = THIS_MODULE, 1164 .name = "ceph", 1165 .mount = ceph_mount, 1166 .kill_sb = ceph_kill_sb, 1167 .fs_flags = FS_RENAME_DOES_D_MOVE, 1168 }; 1169 MODULE_ALIAS_FS("ceph"); 1170 1171 static int __init init_ceph(void) 1172 { 1173 int ret = init_caches(); 1174 if (ret) 1175 goto out; 1176 1177 ceph_flock_init(); 1178 ceph_xattr_init(); 1179 ret = register_filesystem(&ceph_fs_type); 1180 if (ret) 1181 goto out_xattr; 1182 1183 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1184 1185 return 0; 1186 1187 out_xattr: 1188 ceph_xattr_exit(); 1189 destroy_caches(); 1190 out: 1191 return ret; 1192 } 1193 1194 static void __exit exit_ceph(void) 1195 { 1196 dout("exit_ceph\n"); 1197 unregister_filesystem(&ceph_fs_type); 1198 ceph_xattr_exit(); 1199 destroy_caches(); 1200 } 1201 1202 module_init(init_ceph); 1203 module_exit(exit_ceph); 1204 1205 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1206 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1207 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1208 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1209 MODULE_LICENSE("GPL"); 1210