1 2 #include <linux/ceph/ceph_debug.h> 3 4 #include <linux/backing-dev.h> 5 #include <linux/ctype.h> 6 #include <linux/fs.h> 7 #include <linux/inet.h> 8 #include <linux/in6.h> 9 #include <linux/module.h> 10 #include <linux/mount.h> 11 #include <linux/parser.h> 12 #include <linux/sched.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/statfs.h> 16 #include <linux/string.h> 17 18 #include "super.h" 19 #include "mds_client.h" 20 #include "cache.h" 21 22 #include <linux/ceph/ceph_features.h> 23 #include <linux/ceph/decode.h> 24 #include <linux/ceph/mon_client.h> 25 #include <linux/ceph/auth.h> 26 #include <linux/ceph/debugfs.h> 27 28 /* 29 * Ceph superblock operations 30 * 31 * Handle the basics of mounting, unmounting. 32 */ 33 34 /* 35 * super ops 36 */ 37 static void ceph_put_super(struct super_block *s) 38 { 39 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 40 41 dout("put_super\n"); 42 ceph_mdsc_close_sessions(fsc->mdsc); 43 } 44 45 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 46 { 47 struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); 48 struct ceph_mon_client *monc = &fsc->client->monc; 49 struct ceph_statfs st; 50 u64 fsid; 51 int err; 52 u64 data_pool; 53 54 if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { 55 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; 56 } else { 57 data_pool = CEPH_NOPOOL; 58 } 59 60 dout("statfs\n"); 61 err = ceph_monc_do_statfs(monc, data_pool, &st); 62 if (err < 0) 63 return err; 64 65 /* fill in kstatfs */ 66 buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ 67 68 /* 69 * express utilization in terms of large blocks to avoid 70 * overflow on 32-bit machines. 71 * 72 * NOTE: for the time being, we make bsize == frsize to humor 73 * not-yet-ancient versions of glibc that are broken. 74 * Someday, we will probably want to report a real block 75 * size... whatever that may mean for a network file system! 76 */ 77 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 78 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; 79 80 /* 81 * By default use root quota for stats; fallback to overall filesystem 82 * usage if using 'noquotadf' mount option or if the root dir doesn't 83 * have max_bytes quota set. 84 */ 85 if (ceph_test_mount_opt(fsc, NOQUOTADF) || 86 !ceph_quota_update_statfs(fsc, buf)) { 87 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 88 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 89 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 90 } 91 92 buf->f_files = le64_to_cpu(st.num_objects); 93 buf->f_ffree = -1; 94 buf->f_namelen = NAME_MAX; 95 96 /* Must convert the fsid, for consistent values across arches */ 97 mutex_lock(&monc->mutex); 98 fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ 99 le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); 100 mutex_unlock(&monc->mutex); 101 102 buf->f_fsid.val[0] = fsid & 0xffffffff; 103 buf->f_fsid.val[1] = fsid >> 32; 104 105 return 0; 106 } 107 108 109 static int ceph_sync_fs(struct super_block *sb, int wait) 110 { 111 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 112 113 if (!wait) { 114 dout("sync_fs (non-blocking)\n"); 115 ceph_flush_dirty_caps(fsc->mdsc); 116 dout("sync_fs (non-blocking) done\n"); 117 return 0; 118 } 119 120 dout("sync_fs (blocking)\n"); 121 ceph_osdc_sync(&fsc->client->osdc); 122 ceph_mdsc_sync(fsc->mdsc); 123 dout("sync_fs (blocking) done\n"); 124 return 0; 125 } 126 127 /* 128 * mount options 129 */ 130 enum { 131 Opt_wsize, 132 Opt_rsize, 133 Opt_rasize, 134 Opt_caps_wanted_delay_min, 135 Opt_caps_wanted_delay_max, 136 Opt_caps_max, 137 Opt_readdir_max_entries, 138 Opt_readdir_max_bytes, 139 Opt_congestion_kb, 140 Opt_last_int, 141 /* int args above */ 142 Opt_snapdirname, 143 Opt_mds_namespace, 144 Opt_fscache_uniq, 145 Opt_last_string, 146 /* string args above */ 147 Opt_dirstat, 148 Opt_nodirstat, 149 Opt_rbytes, 150 Opt_norbytes, 151 Opt_asyncreaddir, 152 Opt_noasyncreaddir, 153 Opt_dcache, 154 Opt_nodcache, 155 Opt_ino32, 156 Opt_noino32, 157 Opt_fscache, 158 Opt_nofscache, 159 Opt_poolperm, 160 Opt_nopoolperm, 161 Opt_require_active_mds, 162 Opt_norequire_active_mds, 163 #ifdef CONFIG_CEPH_FS_POSIX_ACL 164 Opt_acl, 165 #endif 166 Opt_noacl, 167 Opt_quotadf, 168 Opt_noquotadf, 169 Opt_copyfrom, 170 Opt_nocopyfrom, 171 }; 172 173 static match_table_t fsopt_tokens = { 174 {Opt_wsize, "wsize=%d"}, 175 {Opt_rsize, "rsize=%d"}, 176 {Opt_rasize, "rasize=%d"}, 177 {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, 178 {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, 179 {Opt_caps_max, "caps_max=%d"}, 180 {Opt_readdir_max_entries, "readdir_max_entries=%d"}, 181 {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, 182 {Opt_congestion_kb, "write_congestion_kb=%d"}, 183 /* int args above */ 184 {Opt_snapdirname, "snapdirname=%s"}, 185 {Opt_mds_namespace, "mds_namespace=%s"}, 186 {Opt_fscache_uniq, "fsc=%s"}, 187 /* string args above */ 188 {Opt_dirstat, "dirstat"}, 189 {Opt_nodirstat, "nodirstat"}, 190 {Opt_rbytes, "rbytes"}, 191 {Opt_norbytes, "norbytes"}, 192 {Opt_asyncreaddir, "asyncreaddir"}, 193 {Opt_noasyncreaddir, "noasyncreaddir"}, 194 {Opt_dcache, "dcache"}, 195 {Opt_nodcache, "nodcache"}, 196 {Opt_ino32, "ino32"}, 197 {Opt_noino32, "noino32"}, 198 {Opt_fscache, "fsc"}, 199 {Opt_nofscache, "nofsc"}, 200 {Opt_poolperm, "poolperm"}, 201 {Opt_nopoolperm, "nopoolperm"}, 202 {Opt_require_active_mds, "require_active_mds"}, 203 {Opt_norequire_active_mds, "norequire_active_mds"}, 204 #ifdef CONFIG_CEPH_FS_POSIX_ACL 205 {Opt_acl, "acl"}, 206 #endif 207 {Opt_noacl, "noacl"}, 208 {Opt_quotadf, "quotadf"}, 209 {Opt_noquotadf, "noquotadf"}, 210 {Opt_copyfrom, "copyfrom"}, 211 {Opt_nocopyfrom, "nocopyfrom"}, 212 {-1, NULL} 213 }; 214 215 static int parse_fsopt_token(char *c, void *private) 216 { 217 struct ceph_mount_options *fsopt = private; 218 substring_t argstr[MAX_OPT_ARGS]; 219 int token, intval, ret; 220 221 token = match_token((char *)c, fsopt_tokens, argstr); 222 if (token < 0) 223 return -EINVAL; 224 225 if (token < Opt_last_int) { 226 ret = match_int(&argstr[0], &intval); 227 if (ret < 0) { 228 pr_err("bad option arg (not int) at '%s'\n", c); 229 return ret; 230 } 231 dout("got int token %d val %d\n", token, intval); 232 } else if (token > Opt_last_int && token < Opt_last_string) { 233 dout("got string token %d val %s\n", token, 234 argstr[0].from); 235 } else { 236 dout("got token %d\n", token); 237 } 238 239 switch (token) { 240 case Opt_snapdirname: 241 kfree(fsopt->snapdir_name); 242 fsopt->snapdir_name = kstrndup(argstr[0].from, 243 argstr[0].to-argstr[0].from, 244 GFP_KERNEL); 245 if (!fsopt->snapdir_name) 246 return -ENOMEM; 247 break; 248 case Opt_mds_namespace: 249 kfree(fsopt->mds_namespace); 250 fsopt->mds_namespace = kstrndup(argstr[0].from, 251 argstr[0].to-argstr[0].from, 252 GFP_KERNEL); 253 if (!fsopt->mds_namespace) 254 return -ENOMEM; 255 break; 256 case Opt_fscache_uniq: 257 kfree(fsopt->fscache_uniq); 258 fsopt->fscache_uniq = kstrndup(argstr[0].from, 259 argstr[0].to-argstr[0].from, 260 GFP_KERNEL); 261 if (!fsopt->fscache_uniq) 262 return -ENOMEM; 263 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 264 break; 265 /* misc */ 266 case Opt_wsize: 267 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) 268 return -EINVAL; 269 fsopt->wsize = ALIGN(intval, PAGE_SIZE); 270 break; 271 case Opt_rsize: 272 if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) 273 return -EINVAL; 274 fsopt->rsize = ALIGN(intval, PAGE_SIZE); 275 break; 276 case Opt_rasize: 277 if (intval < 0) 278 return -EINVAL; 279 fsopt->rasize = ALIGN(intval, PAGE_SIZE); 280 break; 281 case Opt_caps_wanted_delay_min: 282 if (intval < 1) 283 return -EINVAL; 284 fsopt->caps_wanted_delay_min = intval; 285 break; 286 case Opt_caps_wanted_delay_max: 287 if (intval < 1) 288 return -EINVAL; 289 fsopt->caps_wanted_delay_max = intval; 290 break; 291 case Opt_caps_max: 292 if (intval < 0) 293 return -EINVAL; 294 fsopt->caps_max = intval; 295 break; 296 case Opt_readdir_max_entries: 297 if (intval < 1) 298 return -EINVAL; 299 fsopt->max_readdir = intval; 300 break; 301 case Opt_readdir_max_bytes: 302 if (intval < (int)PAGE_SIZE && intval != 0) 303 return -EINVAL; 304 fsopt->max_readdir_bytes = intval; 305 break; 306 case Opt_congestion_kb: 307 if (intval < 1024) /* at least 1M */ 308 return -EINVAL; 309 fsopt->congestion_kb = intval; 310 break; 311 case Opt_dirstat: 312 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; 313 break; 314 case Opt_nodirstat: 315 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; 316 break; 317 case Opt_rbytes: 318 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; 319 break; 320 case Opt_norbytes: 321 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; 322 break; 323 case Opt_asyncreaddir: 324 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; 325 break; 326 case Opt_noasyncreaddir: 327 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 328 break; 329 case Opt_dcache: 330 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; 331 break; 332 case Opt_nodcache: 333 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; 334 break; 335 case Opt_ino32: 336 fsopt->flags |= CEPH_MOUNT_OPT_INO32; 337 break; 338 case Opt_noino32: 339 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; 340 break; 341 case Opt_fscache: 342 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; 343 kfree(fsopt->fscache_uniq); 344 fsopt->fscache_uniq = NULL; 345 break; 346 case Opt_nofscache: 347 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; 348 kfree(fsopt->fscache_uniq); 349 fsopt->fscache_uniq = NULL; 350 break; 351 case Opt_poolperm: 352 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; 353 break; 354 case Opt_nopoolperm: 355 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; 356 break; 357 case Opt_require_active_mds: 358 fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; 359 break; 360 case Opt_norequire_active_mds: 361 fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; 362 break; 363 case Opt_quotadf: 364 fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; 365 break; 366 case Opt_noquotadf: 367 fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; 368 break; 369 case Opt_copyfrom: 370 fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; 371 break; 372 case Opt_nocopyfrom: 373 fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; 374 break; 375 #ifdef CONFIG_CEPH_FS_POSIX_ACL 376 case Opt_acl: 377 fsopt->sb_flags |= SB_POSIXACL; 378 break; 379 #endif 380 case Opt_noacl: 381 fsopt->sb_flags &= ~SB_POSIXACL; 382 break; 383 default: 384 BUG_ON(token); 385 } 386 return 0; 387 } 388 389 static void destroy_mount_options(struct ceph_mount_options *args) 390 { 391 dout("destroy_mount_options %p\n", args); 392 kfree(args->snapdir_name); 393 kfree(args->mds_namespace); 394 kfree(args->server_path); 395 kfree(args->fscache_uniq); 396 kfree(args); 397 } 398 399 static int strcmp_null(const char *s1, const char *s2) 400 { 401 if (!s1 && !s2) 402 return 0; 403 if (s1 && !s2) 404 return -1; 405 if (!s1 && s2) 406 return 1; 407 return strcmp(s1, s2); 408 } 409 410 static int compare_mount_options(struct ceph_mount_options *new_fsopt, 411 struct ceph_options *new_opt, 412 struct ceph_fs_client *fsc) 413 { 414 struct ceph_mount_options *fsopt1 = new_fsopt; 415 struct ceph_mount_options *fsopt2 = fsc->mount_options; 416 int ofs = offsetof(struct ceph_mount_options, snapdir_name); 417 int ret; 418 419 ret = memcmp(fsopt1, fsopt2, ofs); 420 if (ret) 421 return ret; 422 423 ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); 424 if (ret) 425 return ret; 426 ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); 427 if (ret) 428 return ret; 429 ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); 430 if (ret) 431 return ret; 432 ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); 433 if (ret) 434 return ret; 435 436 return ceph_compare_options(new_opt, fsc->client); 437 } 438 439 static int parse_mount_options(struct ceph_mount_options **pfsopt, 440 struct ceph_options **popt, 441 int flags, char *options, 442 const char *dev_name) 443 { 444 struct ceph_mount_options *fsopt; 445 const char *dev_name_end; 446 int err; 447 448 if (!dev_name || !*dev_name) 449 return -EINVAL; 450 451 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 452 if (!fsopt) 453 return -ENOMEM; 454 455 dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); 456 457 fsopt->sb_flags = flags; 458 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 459 460 fsopt->wsize = CEPH_MAX_WRITE_SIZE; 461 fsopt->rsize = CEPH_MAX_READ_SIZE; 462 fsopt->rasize = CEPH_RASIZE_DEFAULT; 463 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 464 if (!fsopt->snapdir_name) { 465 err = -ENOMEM; 466 goto out; 467 } 468 469 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 470 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 471 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 472 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 473 fsopt->congestion_kb = default_congestion_kb(); 474 475 /* 476 * Distinguish the server list from the path in "dev_name". 477 * Internally we do not include the leading '/' in the path. 478 * 479 * "dev_name" will look like: 480 * <server_spec>[,<server_spec>...]:[<path>] 481 * where 482 * <server_spec> is <ip>[:<port>] 483 * <path> is optional, but if present must begin with '/' 484 */ 485 dev_name_end = strchr(dev_name, '/'); 486 if (dev_name_end) { 487 if (strlen(dev_name_end) > 1) { 488 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); 489 if (!fsopt->server_path) { 490 err = -ENOMEM; 491 goto out; 492 } 493 } 494 } else { 495 dev_name_end = dev_name + strlen(dev_name); 496 } 497 err = -EINVAL; 498 dev_name_end--; /* back up to ':' separator */ 499 if (dev_name_end < dev_name || *dev_name_end != ':') { 500 pr_err("device name is missing path (no : separator in %s)\n", 501 dev_name); 502 goto out; 503 } 504 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 505 if (fsopt->server_path) 506 dout("server path '%s'\n", fsopt->server_path); 507 508 *popt = ceph_parse_options(options, dev_name, dev_name_end, 509 parse_fsopt_token, (void *)fsopt); 510 if (IS_ERR(*popt)) { 511 err = PTR_ERR(*popt); 512 goto out; 513 } 514 515 /* success */ 516 *pfsopt = fsopt; 517 return 0; 518 519 out: 520 destroy_mount_options(fsopt); 521 return err; 522 } 523 524 /** 525 * ceph_show_options - Show mount options in /proc/mounts 526 * @m: seq_file to write to 527 * @root: root of that (sub)tree 528 */ 529 static int ceph_show_options(struct seq_file *m, struct dentry *root) 530 { 531 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 532 struct ceph_mount_options *fsopt = fsc->mount_options; 533 size_t pos; 534 int ret; 535 536 /* a comma between MNT/MS and client options */ 537 seq_putc(m, ','); 538 pos = m->count; 539 540 ret = ceph_print_client_options(m, fsc->client, false); 541 if (ret) 542 return ret; 543 544 /* retract our comma if no client options */ 545 if (m->count == pos) 546 m->count--; 547 548 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 549 seq_puts(m, ",dirstat"); 550 if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) 551 seq_puts(m, ",rbytes"); 552 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 553 seq_puts(m, ",noasyncreaddir"); 554 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) 555 seq_puts(m, ",nodcache"); 556 if (fsopt->flags & CEPH_MOUNT_OPT_INO32) 557 seq_puts(m, ",ino32"); 558 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { 559 seq_show_option(m, "fsc", fsopt->fscache_uniq); 560 } 561 if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) 562 seq_puts(m, ",nopoolperm"); 563 if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) 564 seq_puts(m, ",noquotadf"); 565 566 #ifdef CONFIG_CEPH_FS_POSIX_ACL 567 if (fsopt->sb_flags & SB_POSIXACL) 568 seq_puts(m, ",acl"); 569 else 570 seq_puts(m, ",noacl"); 571 #endif 572 573 if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) 574 seq_puts(m, ",copyfrom"); 575 576 if (fsopt->mds_namespace) 577 seq_show_option(m, "mds_namespace", fsopt->mds_namespace); 578 if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) 579 seq_printf(m, ",wsize=%d", fsopt->wsize); 580 if (fsopt->rsize != CEPH_MAX_READ_SIZE) 581 seq_printf(m, ",rsize=%d", fsopt->rsize); 582 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 583 seq_printf(m, ",rasize=%d", fsopt->rasize); 584 if (fsopt->congestion_kb != default_congestion_kb()) 585 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 586 if (fsopt->caps_max) 587 seq_printf(m, ",caps_max=%d", fsopt->caps_max); 588 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 589 seq_printf(m, ",caps_wanted_delay_min=%d", 590 fsopt->caps_wanted_delay_min); 591 if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) 592 seq_printf(m, ",caps_wanted_delay_max=%d", 593 fsopt->caps_wanted_delay_max); 594 if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) 595 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); 596 if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) 597 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 598 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 599 seq_show_option(m, "snapdirname", fsopt->snapdir_name); 600 601 return 0; 602 } 603 604 /* 605 * handle any mon messages the standard library doesn't understand. 606 * return error if we don't either. 607 */ 608 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) 609 { 610 struct ceph_fs_client *fsc = client->private; 611 int type = le16_to_cpu(msg->hdr.type); 612 613 switch (type) { 614 case CEPH_MSG_MDS_MAP: 615 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); 616 return 0; 617 case CEPH_MSG_FS_MAP_USER: 618 ceph_mdsc_handle_fsmap(fsc->mdsc, msg); 619 return 0; 620 default: 621 return -1; 622 } 623 } 624 625 /* 626 * create a new fs client 627 * 628 * Success or not, this function consumes @fsopt and @opt. 629 */ 630 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 631 struct ceph_options *opt) 632 { 633 struct ceph_fs_client *fsc; 634 int page_count; 635 size_t size; 636 int err; 637 638 fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); 639 if (!fsc) { 640 err = -ENOMEM; 641 goto fail; 642 } 643 644 fsc->client = ceph_create_client(opt, fsc); 645 if (IS_ERR(fsc->client)) { 646 err = PTR_ERR(fsc->client); 647 goto fail; 648 } 649 opt = NULL; /* fsc->client now owns this */ 650 651 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 652 ceph_set_opt(fsc->client, ABORT_ON_FULL); 653 654 if (!fsopt->mds_namespace) { 655 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 656 0, true); 657 } else { 658 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 659 0, false); 660 } 661 662 fsc->mount_options = fsopt; 663 664 fsc->sb = NULL; 665 fsc->mount_state = CEPH_MOUNT_MOUNTING; 666 667 atomic_long_set(&fsc->writeback_count, 0); 668 669 err = -ENOMEM; 670 /* 671 * The number of concurrent works can be high but they don't need 672 * to be processed in parallel, limit concurrency. 673 */ 674 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 675 if (!fsc->wb_wq) 676 goto fail_client; 677 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); 678 if (!fsc->pg_inv_wq) 679 goto fail_wb_wq; 680 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); 681 if (!fsc->trunc_wq) 682 goto fail_pg_inv_wq; 683 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 684 if (!fsc->cap_wq) 685 goto fail_trunc_wq; 686 687 /* set up mempools */ 688 err = -ENOMEM; 689 page_count = fsc->mount_options->wsize >> PAGE_SHIFT; 690 size = sizeof (struct page *) * (page_count ? page_count : 1); 691 fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); 692 if (!fsc->wb_pagevec_pool) 693 goto fail_cap_wq; 694 695 return fsc; 696 697 fail_cap_wq: 698 destroy_workqueue(fsc->cap_wq); 699 fail_trunc_wq: 700 destroy_workqueue(fsc->trunc_wq); 701 fail_pg_inv_wq: 702 destroy_workqueue(fsc->pg_inv_wq); 703 fail_wb_wq: 704 destroy_workqueue(fsc->wb_wq); 705 fail_client: 706 ceph_destroy_client(fsc->client); 707 fail: 708 kfree(fsc); 709 if (opt) 710 ceph_destroy_options(opt); 711 destroy_mount_options(fsopt); 712 return ERR_PTR(err); 713 } 714 715 static void flush_fs_workqueues(struct ceph_fs_client *fsc) 716 { 717 flush_workqueue(fsc->wb_wq); 718 flush_workqueue(fsc->pg_inv_wq); 719 flush_workqueue(fsc->trunc_wq); 720 flush_workqueue(fsc->cap_wq); 721 } 722 723 static void destroy_fs_client(struct ceph_fs_client *fsc) 724 { 725 dout("destroy_fs_client %p\n", fsc); 726 727 destroy_workqueue(fsc->wb_wq); 728 destroy_workqueue(fsc->pg_inv_wq); 729 destroy_workqueue(fsc->trunc_wq); 730 destroy_workqueue(fsc->cap_wq); 731 732 mempool_destroy(fsc->wb_pagevec_pool); 733 734 destroy_mount_options(fsc->mount_options); 735 736 ceph_destroy_client(fsc->client); 737 738 kfree(fsc); 739 dout("destroy_fs_client %p done\n", fsc); 740 } 741 742 /* 743 * caches 744 */ 745 struct kmem_cache *ceph_inode_cachep; 746 struct kmem_cache *ceph_cap_cachep; 747 struct kmem_cache *ceph_cap_flush_cachep; 748 struct kmem_cache *ceph_dentry_cachep; 749 struct kmem_cache *ceph_file_cachep; 750 struct kmem_cache *ceph_dir_file_cachep; 751 752 static void ceph_inode_init_once(void *foo) 753 { 754 struct ceph_inode_info *ci = foo; 755 inode_init_once(&ci->vfs_inode); 756 } 757 758 static int __init init_caches(void) 759 { 760 int error = -ENOMEM; 761 762 ceph_inode_cachep = kmem_cache_create("ceph_inode_info", 763 sizeof(struct ceph_inode_info), 764 __alignof__(struct ceph_inode_info), 765 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| 766 SLAB_ACCOUNT, ceph_inode_init_once); 767 if (!ceph_inode_cachep) 768 return -ENOMEM; 769 770 ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); 771 if (!ceph_cap_cachep) 772 goto bad_cap; 773 ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, 774 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 775 if (!ceph_cap_flush_cachep) 776 goto bad_cap_flush; 777 778 ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, 779 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); 780 if (!ceph_dentry_cachep) 781 goto bad_dentry; 782 783 ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); 784 if (!ceph_file_cachep) 785 goto bad_file; 786 787 ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); 788 if (!ceph_dir_file_cachep) 789 goto bad_dir_file; 790 791 error = ceph_fscache_register(); 792 if (error) 793 goto bad_fscache; 794 795 return 0; 796 797 bad_fscache: 798 kmem_cache_destroy(ceph_dir_file_cachep); 799 bad_dir_file: 800 kmem_cache_destroy(ceph_file_cachep); 801 bad_file: 802 kmem_cache_destroy(ceph_dentry_cachep); 803 bad_dentry: 804 kmem_cache_destroy(ceph_cap_flush_cachep); 805 bad_cap_flush: 806 kmem_cache_destroy(ceph_cap_cachep); 807 bad_cap: 808 kmem_cache_destroy(ceph_inode_cachep); 809 return error; 810 } 811 812 static void destroy_caches(void) 813 { 814 /* 815 * Make sure all delayed rcu free inodes are flushed before we 816 * destroy cache. 817 */ 818 rcu_barrier(); 819 820 kmem_cache_destroy(ceph_inode_cachep); 821 kmem_cache_destroy(ceph_cap_cachep); 822 kmem_cache_destroy(ceph_cap_flush_cachep); 823 kmem_cache_destroy(ceph_dentry_cachep); 824 kmem_cache_destroy(ceph_file_cachep); 825 kmem_cache_destroy(ceph_dir_file_cachep); 826 827 ceph_fscache_unregister(); 828 } 829 830 831 /* 832 * ceph_umount_begin - initiate forced umount. Tear down down the 833 * mount, skipping steps that may hang while waiting for server(s). 834 */ 835 static void ceph_umount_begin(struct super_block *sb) 836 { 837 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 838 839 dout("ceph_umount_begin - starting forced umount\n"); 840 if (!fsc) 841 return; 842 fsc->mount_state = CEPH_MOUNT_SHUTDOWN; 843 ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); 844 ceph_mdsc_force_umount(fsc->mdsc); 845 return; 846 } 847 848 static int ceph_remount(struct super_block *sb, int *flags, char *data) 849 { 850 sync_filesystem(sb); 851 return 0; 852 } 853 854 static const struct super_operations ceph_super_ops = { 855 .alloc_inode = ceph_alloc_inode, 856 .destroy_inode = ceph_destroy_inode, 857 .free_inode = ceph_free_inode, 858 .write_inode = ceph_write_inode, 859 .drop_inode = ceph_drop_inode, 860 .sync_fs = ceph_sync_fs, 861 .put_super = ceph_put_super, 862 .remount_fs = ceph_remount, 863 .show_options = ceph_show_options, 864 .statfs = ceph_statfs, 865 .umount_begin = ceph_umount_begin, 866 }; 867 868 /* 869 * Bootstrap mount by opening the root directory. Note the mount 870 * @started time from caller, and time out if this takes too long. 871 */ 872 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, 873 const char *path, 874 unsigned long started) 875 { 876 struct ceph_mds_client *mdsc = fsc->mdsc; 877 struct ceph_mds_request *req = NULL; 878 int err; 879 struct dentry *root; 880 881 /* open dir */ 882 dout("open_root_inode opening '%s'\n", path); 883 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 884 if (IS_ERR(req)) 885 return ERR_CAST(req); 886 req->r_path1 = kstrdup(path, GFP_NOFS); 887 if (!req->r_path1) { 888 root = ERR_PTR(-ENOMEM); 889 goto out; 890 } 891 892 req->r_ino1.ino = CEPH_INO_ROOT; 893 req->r_ino1.snap = CEPH_NOSNAP; 894 req->r_started = started; 895 req->r_timeout = fsc->client->options->mount_timeout; 896 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 897 req->r_num_caps = 2; 898 err = ceph_mdsc_do_request(mdsc, NULL, req); 899 if (err == 0) { 900 struct inode *inode = req->r_target_inode; 901 req->r_target_inode = NULL; 902 dout("open_root_inode success\n"); 903 root = d_make_root(inode); 904 if (!root) { 905 root = ERR_PTR(-ENOMEM); 906 goto out; 907 } 908 dout("open_root_inode success, root dentry is %p\n", root); 909 } else { 910 root = ERR_PTR(err); 911 } 912 out: 913 ceph_mdsc_put_request(req); 914 return root; 915 } 916 917 918 919 920 /* 921 * mount: join the ceph cluster, and open root directory. 922 */ 923 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) 924 { 925 int err; 926 unsigned long started = jiffies; /* note the start time */ 927 struct dentry *root; 928 929 dout("mount start %p\n", fsc); 930 mutex_lock(&fsc->client->mount_mutex); 931 932 if (!fsc->sb->s_root) { 933 const char *path; 934 err = __ceph_open_session(fsc->client, started); 935 if (err < 0) 936 goto out; 937 938 /* setup fscache */ 939 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { 940 err = ceph_fscache_register_fs(fsc); 941 if (err < 0) 942 goto out; 943 } 944 945 if (!fsc->mount_options->server_path) { 946 path = ""; 947 dout("mount opening path \\t\n"); 948 } else { 949 path = fsc->mount_options->server_path + 1; 950 dout("mount opening path %s\n", path); 951 } 952 953 err = ceph_fs_debugfs_init(fsc); 954 if (err < 0) 955 goto out; 956 957 root = open_root_dentry(fsc, path, started); 958 if (IS_ERR(root)) { 959 err = PTR_ERR(root); 960 goto out; 961 } 962 fsc->sb->s_root = dget(root); 963 } else { 964 root = dget(fsc->sb->s_root); 965 } 966 967 fsc->mount_state = CEPH_MOUNT_MOUNTED; 968 dout("mount success\n"); 969 mutex_unlock(&fsc->client->mount_mutex); 970 return root; 971 972 out: 973 mutex_unlock(&fsc->client->mount_mutex); 974 return ERR_PTR(err); 975 } 976 977 static int ceph_set_super(struct super_block *s, void *data) 978 { 979 struct ceph_fs_client *fsc = data; 980 int ret; 981 982 dout("set_super %p data %p\n", s, data); 983 984 s->s_flags = fsc->mount_options->sb_flags; 985 s->s_maxbytes = MAX_LFS_FILESIZE; 986 987 s->s_xattr = ceph_xattr_handlers; 988 s->s_fs_info = fsc; 989 fsc->sb = s; 990 fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ 991 992 s->s_op = &ceph_super_ops; 993 s->s_d_op = &ceph_dentry_ops; 994 s->s_export_op = &ceph_export_ops; 995 996 s->s_time_gran = 1000; /* 1000 ns == 1 us */ 997 998 ret = set_anon_super(s, NULL); /* what is that second arg for? */ 999 if (ret != 0) 1000 goto fail; 1001 1002 return ret; 1003 1004 fail: 1005 s->s_fs_info = NULL; 1006 fsc->sb = NULL; 1007 return ret; 1008 } 1009 1010 /* 1011 * share superblock if same fs AND options 1012 */ 1013 static int ceph_compare_super(struct super_block *sb, void *data) 1014 { 1015 struct ceph_fs_client *new = data; 1016 struct ceph_mount_options *fsopt = new->mount_options; 1017 struct ceph_options *opt = new->client->options; 1018 struct ceph_fs_client *other = ceph_sb_to_client(sb); 1019 1020 dout("ceph_compare_super %p\n", sb); 1021 1022 if (compare_mount_options(fsopt, opt, other)) { 1023 dout("monitor(s)/mount options don't match\n"); 1024 return 0; 1025 } 1026 if ((opt->flags & CEPH_OPT_FSID) && 1027 ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { 1028 dout("fsid doesn't match\n"); 1029 return 0; 1030 } 1031 if (fsopt->sb_flags != other->mount_options->sb_flags) { 1032 dout("flags differ\n"); 1033 return 0; 1034 } 1035 return 1; 1036 } 1037 1038 /* 1039 * construct our own bdi so we can control readahead, etc. 1040 */ 1041 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 1042 1043 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) 1044 { 1045 int err; 1046 1047 err = super_setup_bdi_name(sb, "ceph-%ld", 1048 atomic_long_inc_return(&bdi_seq)); 1049 if (err) 1050 return err; 1051 1052 /* set ra_pages based on rasize mount option? */ 1053 sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; 1054 1055 /* set io_pages based on max osd read size */ 1056 sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; 1057 1058 return 0; 1059 } 1060 1061 static struct dentry *ceph_mount(struct file_system_type *fs_type, 1062 int flags, const char *dev_name, void *data) 1063 { 1064 struct super_block *sb; 1065 struct ceph_fs_client *fsc; 1066 struct dentry *res; 1067 int err; 1068 int (*compare_super)(struct super_block *, void *) = ceph_compare_super; 1069 struct ceph_mount_options *fsopt = NULL; 1070 struct ceph_options *opt = NULL; 1071 1072 dout("ceph_mount\n"); 1073 1074 #ifdef CONFIG_CEPH_FS_POSIX_ACL 1075 flags |= SB_POSIXACL; 1076 #endif 1077 err = parse_mount_options(&fsopt, &opt, flags, data, dev_name); 1078 if (err < 0) { 1079 res = ERR_PTR(err); 1080 goto out_final; 1081 } 1082 1083 /* create client (which we may/may not use) */ 1084 fsc = create_fs_client(fsopt, opt); 1085 if (IS_ERR(fsc)) { 1086 res = ERR_CAST(fsc); 1087 goto out_final; 1088 } 1089 1090 err = ceph_mdsc_init(fsc); 1091 if (err < 0) { 1092 res = ERR_PTR(err); 1093 goto out; 1094 } 1095 1096 if (ceph_test_opt(fsc->client, NOSHARE)) 1097 compare_super = NULL; 1098 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); 1099 if (IS_ERR(sb)) { 1100 res = ERR_CAST(sb); 1101 goto out; 1102 } 1103 1104 if (ceph_sb_to_client(sb) != fsc) { 1105 ceph_mdsc_destroy(fsc); 1106 destroy_fs_client(fsc); 1107 fsc = ceph_sb_to_client(sb); 1108 dout("get_sb got existing client %p\n", fsc); 1109 } else { 1110 dout("get_sb using new client %p\n", fsc); 1111 err = ceph_setup_bdi(sb, fsc); 1112 if (err < 0) { 1113 res = ERR_PTR(err); 1114 goto out_splat; 1115 } 1116 } 1117 1118 res = ceph_real_mount(fsc); 1119 if (IS_ERR(res)) 1120 goto out_splat; 1121 dout("root %p inode %p ino %llx.%llx\n", res, 1122 d_inode(res), ceph_vinop(d_inode(res))); 1123 return res; 1124 1125 out_splat: 1126 ceph_mdsc_close_sessions(fsc->mdsc); 1127 deactivate_locked_super(sb); 1128 goto out_final; 1129 1130 out: 1131 ceph_mdsc_destroy(fsc); 1132 destroy_fs_client(fsc); 1133 out_final: 1134 dout("ceph_mount fail %ld\n", PTR_ERR(res)); 1135 return res; 1136 } 1137 1138 static void ceph_kill_sb(struct super_block *s) 1139 { 1140 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 1141 dev_t dev = s->s_dev; 1142 1143 dout("kill_sb %p\n", s); 1144 1145 ceph_mdsc_pre_umount(fsc->mdsc); 1146 flush_fs_workqueues(fsc); 1147 1148 generic_shutdown_super(s); 1149 1150 fsc->client->extra_mon_dispatch = NULL; 1151 ceph_fs_debugfs_cleanup(fsc); 1152 1153 ceph_fscache_unregister_fs(fsc); 1154 1155 ceph_mdsc_destroy(fsc); 1156 1157 destroy_fs_client(fsc); 1158 free_anon_bdev(dev); 1159 } 1160 1161 static struct file_system_type ceph_fs_type = { 1162 .owner = THIS_MODULE, 1163 .name = "ceph", 1164 .mount = ceph_mount, 1165 .kill_sb = ceph_kill_sb, 1166 .fs_flags = FS_RENAME_DOES_D_MOVE, 1167 }; 1168 MODULE_ALIAS_FS("ceph"); 1169 1170 static int __init init_ceph(void) 1171 { 1172 int ret = init_caches(); 1173 if (ret) 1174 goto out; 1175 1176 ceph_flock_init(); 1177 ceph_xattr_init(); 1178 ret = register_filesystem(&ceph_fs_type); 1179 if (ret) 1180 goto out_xattr; 1181 1182 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); 1183 1184 return 0; 1185 1186 out_xattr: 1187 ceph_xattr_exit(); 1188 destroy_caches(); 1189 out: 1190 return ret; 1191 } 1192 1193 static void __exit exit_ceph(void) 1194 { 1195 dout("exit_ceph\n"); 1196 unregister_filesystem(&ceph_fs_type); 1197 ceph_xattr_exit(); 1198 destroy_caches(); 1199 } 1200 1201 module_init(init_ceph); 1202 module_exit(exit_ceph); 1203 1204 MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 1205 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 1206 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); 1207 MODULE_DESCRIPTION("Ceph filesystem for Linux"); 1208 MODULE_LICENSE("GPL"); 1209