1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/seq_file.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/moduleparam.h> 18 #include <linux/parser.h> 19 #include <linux/statfs.h> 20 #include <linux/random.h> 21 #include <linux/sched.h> 22 #include <linux/exportfs.h> 23 #include <linux/posix_acl.h> 24 25 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 26 MODULE_DESCRIPTION("Filesystem in Userspace"); 27 MODULE_LICENSE("GPL"); 28 29 static struct kmem_cache *fuse_inode_cachep; 30 struct list_head fuse_conn_list; 31 DEFINE_MUTEX(fuse_mutex); 32 33 static int set_global_limit(const char *val, struct kernel_param *kp); 34 35 unsigned max_user_bgreq; 36 module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 37 &max_user_bgreq, 0644); 38 __MODULE_PARM_TYPE(max_user_bgreq, "uint"); 39 MODULE_PARM_DESC(max_user_bgreq, 40 "Global limit for the maximum number of backgrounded requests an " 41 "unprivileged user can set"); 42 43 unsigned max_user_congthresh; 44 module_param_call(max_user_congthresh, set_global_limit, param_get_uint, 45 &max_user_congthresh, 0644); 46 __MODULE_PARM_TYPE(max_user_congthresh, "uint"); 47 MODULE_PARM_DESC(max_user_congthresh, 48 "Global limit for the maximum congestion threshold an " 49 "unprivileged user can set"); 50 51 #define FUSE_SUPER_MAGIC 0x65735546 52 53 #define FUSE_DEFAULT_BLKSIZE 512 54 55 /** Maximum number of outstanding background requests */ 56 #define FUSE_DEFAULT_MAX_BACKGROUND 12 57 58 /** Congestion starts at 75% of maximum */ 59 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) 60 61 struct fuse_mount_data { 62 int fd; 63 unsigned rootmode; 64 kuid_t user_id; 65 kgid_t group_id; 66 unsigned fd_present:1; 67 unsigned rootmode_present:1; 68 unsigned user_id_present:1; 69 unsigned group_id_present:1; 70 unsigned default_permissions:1; 71 unsigned allow_other:1; 72 unsigned max_read; 73 unsigned blksize; 74 }; 75 76 struct fuse_forget_link *fuse_alloc_forget(void) 77 { 78 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); 79 } 80 81 static struct inode *fuse_alloc_inode(struct super_block *sb) 82 { 83 struct inode *inode; 84 struct fuse_inode *fi; 85 86 inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); 87 if (!inode) 88 return NULL; 89 90 fi = get_fuse_inode(inode); 91 fi->i_time = 0; 92 fi->nodeid = 0; 93 fi->nlookup = 0; 94 fi->attr_version = 0; 95 fi->writectr = 0; 96 fi->orig_ino = 0; 97 fi->state = 0; 98 INIT_LIST_HEAD(&fi->write_files); 99 INIT_LIST_HEAD(&fi->queued_writes); 100 INIT_LIST_HEAD(&fi->writepages); 101 init_waitqueue_head(&fi->page_waitq); 102 mutex_init(&fi->mutex); 103 fi->forget = fuse_alloc_forget(); 104 if (!fi->forget) { 105 kmem_cache_free(fuse_inode_cachep, inode); 106 return NULL; 107 } 108 109 return inode; 110 } 111 112 static void fuse_i_callback(struct rcu_head *head) 113 { 114 struct inode *inode = container_of(head, struct inode, i_rcu); 115 kmem_cache_free(fuse_inode_cachep, inode); 116 } 117 118 static void fuse_destroy_inode(struct inode *inode) 119 { 120 struct fuse_inode *fi = get_fuse_inode(inode); 121 BUG_ON(!list_empty(&fi->write_files)); 122 BUG_ON(!list_empty(&fi->queued_writes)); 123 mutex_destroy(&fi->mutex); 124 kfree(fi->forget); 125 call_rcu(&inode->i_rcu, fuse_i_callback); 126 } 127 128 static void fuse_evict_inode(struct inode *inode) 129 { 130 truncate_inode_pages_final(&inode->i_data); 131 clear_inode(inode); 132 if (inode->i_sb->s_flags & MS_ACTIVE) { 133 struct fuse_conn *fc = get_fuse_conn(inode); 134 struct fuse_inode *fi = get_fuse_inode(inode); 135 fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup); 136 fi->forget = NULL; 137 } 138 } 139 140 static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) 141 { 142 sync_filesystem(sb); 143 if (*flags & MS_MANDLOCK) 144 return -EINVAL; 145 146 return 0; 147 } 148 149 /* 150 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down 151 * so that it will fit. 152 */ 153 static ino_t fuse_squash_ino(u64 ino64) 154 { 155 ino_t ino = (ino_t) ino64; 156 if (sizeof(ino_t) < sizeof(u64)) 157 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; 158 return ino; 159 } 160 161 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 162 u64 attr_valid) 163 { 164 struct fuse_conn *fc = get_fuse_conn(inode); 165 struct fuse_inode *fi = get_fuse_inode(inode); 166 167 fi->attr_version = ++fc->attr_version; 168 fi->i_time = attr_valid; 169 170 inode->i_ino = fuse_squash_ino(attr->ino); 171 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 172 set_nlink(inode, attr->nlink); 173 inode->i_uid = make_kuid(&init_user_ns, attr->uid); 174 inode->i_gid = make_kgid(&init_user_ns, attr->gid); 175 inode->i_blocks = attr->blocks; 176 inode->i_atime.tv_sec = attr->atime; 177 inode->i_atime.tv_nsec = attr->atimensec; 178 /* mtime from server may be stale due to local buffered write */ 179 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { 180 inode->i_mtime.tv_sec = attr->mtime; 181 inode->i_mtime.tv_nsec = attr->mtimensec; 182 inode->i_ctime.tv_sec = attr->ctime; 183 inode->i_ctime.tv_nsec = attr->ctimensec; 184 } 185 186 if (attr->blksize != 0) 187 inode->i_blkbits = ilog2(attr->blksize); 188 else 189 inode->i_blkbits = inode->i_sb->s_blocksize_bits; 190 191 /* 192 * Don't set the sticky bit in i_mode, unless we want the VFS 193 * to check permissions. This prevents failures due to the 194 * check in may_delete(). 195 */ 196 fi->orig_i_mode = inode->i_mode; 197 if (!fc->default_permissions) 198 inode->i_mode &= ~S_ISVTX; 199 200 fi->orig_ino = attr->ino; 201 } 202 203 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 204 u64 attr_valid, u64 attr_version) 205 { 206 struct fuse_conn *fc = get_fuse_conn(inode); 207 struct fuse_inode *fi = get_fuse_inode(inode); 208 bool is_wb = fc->writeback_cache; 209 loff_t oldsize; 210 struct timespec old_mtime; 211 212 spin_lock(&fc->lock); 213 if ((attr_version != 0 && fi->attr_version > attr_version) || 214 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 215 spin_unlock(&fc->lock); 216 return; 217 } 218 219 old_mtime = inode->i_mtime; 220 fuse_change_attributes_common(inode, attr, attr_valid); 221 222 oldsize = inode->i_size; 223 /* 224 * In case of writeback_cache enabled, the cached writes beyond EOF 225 * extend local i_size without keeping userspace server in sync. So, 226 * attr->size coming from server can be stale. We cannot trust it. 227 */ 228 if (!is_wb || !S_ISREG(inode->i_mode)) 229 i_size_write(inode, attr->size); 230 spin_unlock(&fc->lock); 231 232 if (!is_wb && S_ISREG(inode->i_mode)) { 233 bool inval = false; 234 235 if (oldsize != attr->size) { 236 truncate_pagecache(inode, attr->size); 237 inval = true; 238 } else if (fc->auto_inval_data) { 239 struct timespec new_mtime = { 240 .tv_sec = attr->mtime, 241 .tv_nsec = attr->mtimensec, 242 }; 243 244 /* 245 * Auto inval mode also checks and invalidates if mtime 246 * has changed. 247 */ 248 if (!timespec_equal(&old_mtime, &new_mtime)) 249 inval = true; 250 } 251 252 if (inval) 253 invalidate_inode_pages2(inode->i_mapping); 254 } 255 } 256 257 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) 258 { 259 inode->i_mode = attr->mode & S_IFMT; 260 inode->i_size = attr->size; 261 inode->i_mtime.tv_sec = attr->mtime; 262 inode->i_mtime.tv_nsec = attr->mtimensec; 263 inode->i_ctime.tv_sec = attr->ctime; 264 inode->i_ctime.tv_nsec = attr->ctimensec; 265 if (S_ISREG(inode->i_mode)) { 266 fuse_init_common(inode); 267 fuse_init_file_inode(inode); 268 } else if (S_ISDIR(inode->i_mode)) 269 fuse_init_dir(inode); 270 else if (S_ISLNK(inode->i_mode)) 271 fuse_init_symlink(inode); 272 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 273 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 274 fuse_init_common(inode); 275 init_special_inode(inode, inode->i_mode, 276 new_decode_dev(attr->rdev)); 277 } else 278 BUG(); 279 } 280 281 int fuse_inode_eq(struct inode *inode, void *_nodeidp) 282 { 283 u64 nodeid = *(u64 *) _nodeidp; 284 if (get_node_id(inode) == nodeid) 285 return 1; 286 else 287 return 0; 288 } 289 290 static int fuse_inode_set(struct inode *inode, void *_nodeidp) 291 { 292 u64 nodeid = *(u64 *) _nodeidp; 293 get_fuse_inode(inode)->nodeid = nodeid; 294 return 0; 295 } 296 297 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 298 int generation, struct fuse_attr *attr, 299 u64 attr_valid, u64 attr_version) 300 { 301 struct inode *inode; 302 struct fuse_inode *fi; 303 struct fuse_conn *fc = get_fuse_conn_super(sb); 304 305 retry: 306 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 307 if (!inode) 308 return NULL; 309 310 if ((inode->i_state & I_NEW)) { 311 inode->i_flags |= S_NOATIME; 312 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 313 inode->i_flags |= S_NOCMTIME; 314 inode->i_generation = generation; 315 fuse_init_inode(inode, attr); 316 unlock_new_inode(inode); 317 } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { 318 /* Inode has changed type, any I/O on the old should fail */ 319 make_bad_inode(inode); 320 iput(inode); 321 goto retry; 322 } 323 324 fi = get_fuse_inode(inode); 325 spin_lock(&fc->lock); 326 fi->nlookup++; 327 spin_unlock(&fc->lock); 328 fuse_change_attributes(inode, attr, attr_valid, attr_version); 329 330 return inode; 331 } 332 333 int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, 334 loff_t offset, loff_t len) 335 { 336 struct inode *inode; 337 pgoff_t pg_start; 338 pgoff_t pg_end; 339 340 inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); 341 if (!inode) 342 return -ENOENT; 343 344 fuse_invalidate_attr(inode); 345 forget_all_cached_acls(inode); 346 if (offset >= 0) { 347 pg_start = offset >> PAGE_SHIFT; 348 if (len <= 0) 349 pg_end = -1; 350 else 351 pg_end = (offset + len - 1) >> PAGE_SHIFT; 352 invalidate_inode_pages2_range(inode->i_mapping, 353 pg_start, pg_end); 354 } 355 iput(inode); 356 return 0; 357 } 358 359 void fuse_lock_inode(struct inode *inode) 360 { 361 if (!get_fuse_conn(inode)->parallel_dirops) 362 mutex_lock(&get_fuse_inode(inode)->mutex); 363 } 364 365 void fuse_unlock_inode(struct inode *inode) 366 { 367 if (!get_fuse_conn(inode)->parallel_dirops) 368 mutex_unlock(&get_fuse_inode(inode)->mutex); 369 } 370 371 static void fuse_umount_begin(struct super_block *sb) 372 { 373 fuse_abort_conn(get_fuse_conn_super(sb)); 374 } 375 376 static void fuse_send_destroy(struct fuse_conn *fc) 377 { 378 struct fuse_req *req = fc->destroy_req; 379 if (req && fc->conn_init) { 380 fc->destroy_req = NULL; 381 req->in.h.opcode = FUSE_DESTROY; 382 __set_bit(FR_FORCE, &req->flags); 383 __clear_bit(FR_BACKGROUND, &req->flags); 384 fuse_request_send(fc, req); 385 fuse_put_request(fc, req); 386 } 387 } 388 389 static void fuse_put_super(struct super_block *sb) 390 { 391 struct fuse_conn *fc = get_fuse_conn_super(sb); 392 393 fuse_send_destroy(fc); 394 395 fuse_abort_conn(fc); 396 mutex_lock(&fuse_mutex); 397 list_del(&fc->entry); 398 fuse_ctl_remove_conn(fc); 399 mutex_unlock(&fuse_mutex); 400 401 fuse_conn_put(fc); 402 } 403 404 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 405 { 406 stbuf->f_type = FUSE_SUPER_MAGIC; 407 stbuf->f_bsize = attr->bsize; 408 stbuf->f_frsize = attr->frsize; 409 stbuf->f_blocks = attr->blocks; 410 stbuf->f_bfree = attr->bfree; 411 stbuf->f_bavail = attr->bavail; 412 stbuf->f_files = attr->files; 413 stbuf->f_ffree = attr->ffree; 414 stbuf->f_namelen = attr->namelen; 415 /* fsid is left zero */ 416 } 417 418 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 419 { 420 struct super_block *sb = dentry->d_sb; 421 struct fuse_conn *fc = get_fuse_conn_super(sb); 422 FUSE_ARGS(args); 423 struct fuse_statfs_out outarg; 424 int err; 425 426 if (!fuse_allow_current_process(fc)) { 427 buf->f_type = FUSE_SUPER_MAGIC; 428 return 0; 429 } 430 431 memset(&outarg, 0, sizeof(outarg)); 432 args.in.numargs = 0; 433 args.in.h.opcode = FUSE_STATFS; 434 args.in.h.nodeid = get_node_id(d_inode(dentry)); 435 args.out.numargs = 1; 436 args.out.args[0].size = sizeof(outarg); 437 args.out.args[0].value = &outarg; 438 err = fuse_simple_request(fc, &args); 439 if (!err) 440 convert_fuse_statfs(buf, &outarg.st); 441 return err; 442 } 443 444 enum { 445 OPT_FD, 446 OPT_ROOTMODE, 447 OPT_USER_ID, 448 OPT_GROUP_ID, 449 OPT_DEFAULT_PERMISSIONS, 450 OPT_ALLOW_OTHER, 451 OPT_MAX_READ, 452 OPT_BLKSIZE, 453 OPT_ERR 454 }; 455 456 static const match_table_t tokens = { 457 {OPT_FD, "fd=%u"}, 458 {OPT_ROOTMODE, "rootmode=%o"}, 459 {OPT_USER_ID, "user_id=%u"}, 460 {OPT_GROUP_ID, "group_id=%u"}, 461 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 462 {OPT_ALLOW_OTHER, "allow_other"}, 463 {OPT_MAX_READ, "max_read=%u"}, 464 {OPT_BLKSIZE, "blksize=%u"}, 465 {OPT_ERR, NULL} 466 }; 467 468 static int fuse_match_uint(substring_t *s, unsigned int *res) 469 { 470 int err = -ENOMEM; 471 char *buf = match_strdup(s); 472 if (buf) { 473 err = kstrtouint(buf, 10, res); 474 kfree(buf); 475 } 476 return err; 477 } 478 479 static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) 480 { 481 char *p; 482 memset(d, 0, sizeof(struct fuse_mount_data)); 483 d->max_read = ~0; 484 d->blksize = FUSE_DEFAULT_BLKSIZE; 485 486 while ((p = strsep(&opt, ",")) != NULL) { 487 int token; 488 int value; 489 unsigned uv; 490 substring_t args[MAX_OPT_ARGS]; 491 if (!*p) 492 continue; 493 494 token = match_token(p, tokens, args); 495 switch (token) { 496 case OPT_FD: 497 if (match_int(&args[0], &value)) 498 return 0; 499 d->fd = value; 500 d->fd_present = 1; 501 break; 502 503 case OPT_ROOTMODE: 504 if (match_octal(&args[0], &value)) 505 return 0; 506 if (!fuse_valid_type(value)) 507 return 0; 508 d->rootmode = value; 509 d->rootmode_present = 1; 510 break; 511 512 case OPT_USER_ID: 513 if (fuse_match_uint(&args[0], &uv)) 514 return 0; 515 d->user_id = make_kuid(current_user_ns(), uv); 516 if (!uid_valid(d->user_id)) 517 return 0; 518 d->user_id_present = 1; 519 break; 520 521 case OPT_GROUP_ID: 522 if (fuse_match_uint(&args[0], &uv)) 523 return 0; 524 d->group_id = make_kgid(current_user_ns(), uv); 525 if (!gid_valid(d->group_id)) 526 return 0; 527 d->group_id_present = 1; 528 break; 529 530 case OPT_DEFAULT_PERMISSIONS: 531 d->default_permissions = 1; 532 break; 533 534 case OPT_ALLOW_OTHER: 535 d->allow_other = 1; 536 break; 537 538 case OPT_MAX_READ: 539 if (match_int(&args[0], &value)) 540 return 0; 541 d->max_read = value; 542 break; 543 544 case OPT_BLKSIZE: 545 if (!is_bdev || match_int(&args[0], &value)) 546 return 0; 547 d->blksize = value; 548 break; 549 550 default: 551 return 0; 552 } 553 } 554 555 if (!d->fd_present || !d->rootmode_present || 556 !d->user_id_present || !d->group_id_present) 557 return 0; 558 559 return 1; 560 } 561 562 static int fuse_show_options(struct seq_file *m, struct dentry *root) 563 { 564 struct super_block *sb = root->d_sb; 565 struct fuse_conn *fc = get_fuse_conn_super(sb); 566 567 seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id)); 568 seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id)); 569 if (fc->default_permissions) 570 seq_puts(m, ",default_permissions"); 571 if (fc->allow_other) 572 seq_puts(m, ",allow_other"); 573 if (fc->max_read != ~0) 574 seq_printf(m, ",max_read=%u", fc->max_read); 575 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 576 seq_printf(m, ",blksize=%lu", sb->s_blocksize); 577 return 0; 578 } 579 580 static void fuse_iqueue_init(struct fuse_iqueue *fiq) 581 { 582 memset(fiq, 0, sizeof(struct fuse_iqueue)); 583 init_waitqueue_head(&fiq->waitq); 584 INIT_LIST_HEAD(&fiq->pending); 585 INIT_LIST_HEAD(&fiq->interrupts); 586 fiq->forget_list_tail = &fiq->forget_list_head; 587 fiq->connected = 1; 588 } 589 590 static void fuse_pqueue_init(struct fuse_pqueue *fpq) 591 { 592 memset(fpq, 0, sizeof(struct fuse_pqueue)); 593 spin_lock_init(&fpq->lock); 594 INIT_LIST_HEAD(&fpq->processing); 595 INIT_LIST_HEAD(&fpq->io); 596 fpq->connected = 1; 597 } 598 599 void fuse_conn_init(struct fuse_conn *fc) 600 { 601 memset(fc, 0, sizeof(*fc)); 602 spin_lock_init(&fc->lock); 603 init_rwsem(&fc->killsb); 604 atomic_set(&fc->count, 1); 605 atomic_set(&fc->dev_count, 1); 606 init_waitqueue_head(&fc->blocked_waitq); 607 init_waitqueue_head(&fc->reserved_req_waitq); 608 fuse_iqueue_init(&fc->iq); 609 INIT_LIST_HEAD(&fc->bg_queue); 610 INIT_LIST_HEAD(&fc->entry); 611 INIT_LIST_HEAD(&fc->devices); 612 atomic_set(&fc->num_waiting, 0); 613 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 614 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 615 fc->khctr = 0; 616 fc->polled_files = RB_ROOT; 617 fc->blocked = 0; 618 fc->initialized = 0; 619 fc->connected = 1; 620 fc->attr_version = 1; 621 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 622 } 623 EXPORT_SYMBOL_GPL(fuse_conn_init); 624 625 void fuse_conn_put(struct fuse_conn *fc) 626 { 627 if (atomic_dec_and_test(&fc->count)) { 628 if (fc->destroy_req) 629 fuse_request_free(fc->destroy_req); 630 fc->release(fc); 631 } 632 } 633 EXPORT_SYMBOL_GPL(fuse_conn_put); 634 635 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) 636 { 637 atomic_inc(&fc->count); 638 return fc; 639 } 640 EXPORT_SYMBOL_GPL(fuse_conn_get); 641 642 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 643 { 644 struct fuse_attr attr; 645 memset(&attr, 0, sizeof(attr)); 646 647 attr.mode = mode; 648 attr.ino = FUSE_ROOT_ID; 649 attr.nlink = 1; 650 return fuse_iget(sb, 1, 0, &attr, 0, 0); 651 } 652 653 struct fuse_inode_handle { 654 u64 nodeid; 655 u32 generation; 656 }; 657 658 static struct dentry *fuse_get_dentry(struct super_block *sb, 659 struct fuse_inode_handle *handle) 660 { 661 struct fuse_conn *fc = get_fuse_conn_super(sb); 662 struct inode *inode; 663 struct dentry *entry; 664 int err = -ESTALE; 665 666 if (handle->nodeid == 0) 667 goto out_err; 668 669 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); 670 if (!inode) { 671 struct fuse_entry_out outarg; 672 const struct qstr name = QSTR_INIT(".", 1); 673 674 if (!fc->export_support) 675 goto out_err; 676 677 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, 678 &inode); 679 if (err && err != -ENOENT) 680 goto out_err; 681 if (err || !inode) { 682 err = -ESTALE; 683 goto out_err; 684 } 685 err = -EIO; 686 if (get_node_id(inode) != handle->nodeid) 687 goto out_iput; 688 } 689 err = -ESTALE; 690 if (inode->i_generation != handle->generation) 691 goto out_iput; 692 693 entry = d_obtain_alias(inode); 694 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) 695 fuse_invalidate_entry_cache(entry); 696 697 return entry; 698 699 out_iput: 700 iput(inode); 701 out_err: 702 return ERR_PTR(err); 703 } 704 705 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, 706 struct inode *parent) 707 { 708 int len = parent ? 6 : 3; 709 u64 nodeid; 710 u32 generation; 711 712 if (*max_len < len) { 713 *max_len = len; 714 return FILEID_INVALID; 715 } 716 717 nodeid = get_fuse_inode(inode)->nodeid; 718 generation = inode->i_generation; 719 720 fh[0] = (u32)(nodeid >> 32); 721 fh[1] = (u32)(nodeid & 0xffffffff); 722 fh[2] = generation; 723 724 if (parent) { 725 nodeid = get_fuse_inode(parent)->nodeid; 726 generation = parent->i_generation; 727 728 fh[3] = (u32)(nodeid >> 32); 729 fh[4] = (u32)(nodeid & 0xffffffff); 730 fh[5] = generation; 731 } 732 733 *max_len = len; 734 return parent ? 0x82 : 0x81; 735 } 736 737 static struct dentry *fuse_fh_to_dentry(struct super_block *sb, 738 struct fid *fid, int fh_len, int fh_type) 739 { 740 struct fuse_inode_handle handle; 741 742 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) 743 return NULL; 744 745 handle.nodeid = (u64) fid->raw[0] << 32; 746 handle.nodeid |= (u64) fid->raw[1]; 747 handle.generation = fid->raw[2]; 748 return fuse_get_dentry(sb, &handle); 749 } 750 751 static struct dentry *fuse_fh_to_parent(struct super_block *sb, 752 struct fid *fid, int fh_len, int fh_type) 753 { 754 struct fuse_inode_handle parent; 755 756 if (fh_type != 0x82 || fh_len < 6) 757 return NULL; 758 759 parent.nodeid = (u64) fid->raw[3] << 32; 760 parent.nodeid |= (u64) fid->raw[4]; 761 parent.generation = fid->raw[5]; 762 return fuse_get_dentry(sb, &parent); 763 } 764 765 static struct dentry *fuse_get_parent(struct dentry *child) 766 { 767 struct inode *child_inode = d_inode(child); 768 struct fuse_conn *fc = get_fuse_conn(child_inode); 769 struct inode *inode; 770 struct dentry *parent; 771 struct fuse_entry_out outarg; 772 const struct qstr name = QSTR_INIT("..", 2); 773 int err; 774 775 if (!fc->export_support) 776 return ERR_PTR(-ESTALE); 777 778 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 779 &name, &outarg, &inode); 780 if (err) { 781 if (err == -ENOENT) 782 return ERR_PTR(-ESTALE); 783 return ERR_PTR(err); 784 } 785 786 parent = d_obtain_alias(inode); 787 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) 788 fuse_invalidate_entry_cache(parent); 789 790 return parent; 791 } 792 793 static const struct export_operations fuse_export_operations = { 794 .fh_to_dentry = fuse_fh_to_dentry, 795 .fh_to_parent = fuse_fh_to_parent, 796 .encode_fh = fuse_encode_fh, 797 .get_parent = fuse_get_parent, 798 }; 799 800 static const struct super_operations fuse_super_operations = { 801 .alloc_inode = fuse_alloc_inode, 802 .destroy_inode = fuse_destroy_inode, 803 .evict_inode = fuse_evict_inode, 804 .write_inode = fuse_write_inode, 805 .drop_inode = generic_delete_inode, 806 .remount_fs = fuse_remount_fs, 807 .put_super = fuse_put_super, 808 .umount_begin = fuse_umount_begin, 809 .statfs = fuse_statfs, 810 .show_options = fuse_show_options, 811 }; 812 813 static void sanitize_global_limit(unsigned *limit) 814 { 815 if (*limit == 0) 816 *limit = ((totalram_pages << PAGE_SHIFT) >> 13) / 817 sizeof(struct fuse_req); 818 819 if (*limit >= 1 << 16) 820 *limit = (1 << 16) - 1; 821 } 822 823 static int set_global_limit(const char *val, struct kernel_param *kp) 824 { 825 int rv; 826 827 rv = param_set_uint(val, kp); 828 if (rv) 829 return rv; 830 831 sanitize_global_limit((unsigned *)kp->arg); 832 833 return 0; 834 } 835 836 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) 837 { 838 int cap_sys_admin = capable(CAP_SYS_ADMIN); 839 840 if (arg->minor < 13) 841 return; 842 843 sanitize_global_limit(&max_user_bgreq); 844 sanitize_global_limit(&max_user_congthresh); 845 846 if (arg->max_background) { 847 fc->max_background = arg->max_background; 848 849 if (!cap_sys_admin && fc->max_background > max_user_bgreq) 850 fc->max_background = max_user_bgreq; 851 } 852 if (arg->congestion_threshold) { 853 fc->congestion_threshold = arg->congestion_threshold; 854 855 if (!cap_sys_admin && 856 fc->congestion_threshold > max_user_congthresh) 857 fc->congestion_threshold = max_user_congthresh; 858 } 859 } 860 861 static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 862 { 863 struct fuse_init_out *arg = &req->misc.init_out; 864 865 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) 866 fc->conn_error = 1; 867 else { 868 unsigned long ra_pages; 869 870 process_init_limits(fc, arg); 871 872 if (arg->minor >= 6) { 873 ra_pages = arg->max_readahead / PAGE_SIZE; 874 if (arg->flags & FUSE_ASYNC_READ) 875 fc->async_read = 1; 876 if (!(arg->flags & FUSE_POSIX_LOCKS)) 877 fc->no_lock = 1; 878 if (arg->minor >= 17) { 879 if (!(arg->flags & FUSE_FLOCK_LOCKS)) 880 fc->no_flock = 1; 881 } else { 882 if (!(arg->flags & FUSE_POSIX_LOCKS)) 883 fc->no_flock = 1; 884 } 885 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 886 fc->atomic_o_trunc = 1; 887 if (arg->minor >= 9) { 888 /* LOOKUP has dependency on proto version */ 889 if (arg->flags & FUSE_EXPORT_SUPPORT) 890 fc->export_support = 1; 891 } 892 if (arg->flags & FUSE_BIG_WRITES) 893 fc->big_writes = 1; 894 if (arg->flags & FUSE_DONT_MASK) 895 fc->dont_mask = 1; 896 if (arg->flags & FUSE_AUTO_INVAL_DATA) 897 fc->auto_inval_data = 1; 898 if (arg->flags & FUSE_DO_READDIRPLUS) { 899 fc->do_readdirplus = 1; 900 if (arg->flags & FUSE_READDIRPLUS_AUTO) 901 fc->readdirplus_auto = 1; 902 } 903 if (arg->flags & FUSE_ASYNC_DIO) 904 fc->async_dio = 1; 905 if (arg->flags & FUSE_WRITEBACK_CACHE) 906 fc->writeback_cache = 1; 907 if (arg->flags & FUSE_PARALLEL_DIROPS) 908 fc->parallel_dirops = 1; 909 if (arg->flags & FUSE_HANDLE_KILLPRIV) 910 fc->handle_killpriv = 1; 911 if (arg->time_gran && arg->time_gran <= 1000000000) 912 fc->sb->s_time_gran = arg->time_gran; 913 if ((arg->flags & FUSE_POSIX_ACL)) { 914 fc->default_permissions = 1; 915 fc->posix_acl = 1; 916 fc->sb->s_xattr = fuse_acl_xattr_handlers; 917 } 918 } else { 919 ra_pages = fc->max_read / PAGE_SIZE; 920 fc->no_lock = 1; 921 fc->no_flock = 1; 922 } 923 924 fc->sb->s_bdi->ra_pages = 925 min(fc->sb->s_bdi->ra_pages, ra_pages); 926 fc->minor = arg->minor; 927 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 928 fc->max_write = max_t(unsigned, 4096, fc->max_write); 929 fc->conn_init = 1; 930 } 931 fuse_set_initialized(fc); 932 wake_up_all(&fc->blocked_waitq); 933 } 934 935 static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) 936 { 937 struct fuse_init_in *arg = &req->misc.init_in; 938 939 arg->major = FUSE_KERNEL_VERSION; 940 arg->minor = FUSE_KERNEL_MINOR_VERSION; 941 arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; 942 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 943 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 944 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 945 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 946 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 947 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | 948 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL; 949 req->in.h.opcode = FUSE_INIT; 950 req->in.numargs = 1; 951 req->in.args[0].size = sizeof(*arg); 952 req->in.args[0].value = arg; 953 req->out.numargs = 1; 954 /* Variable length argument used for backward compatibility 955 with interface version < 7.5. Rest of init_out is zeroed 956 by do_get_request(), so a short reply is not a problem */ 957 req->out.argvar = 1; 958 req->out.args[0].size = sizeof(struct fuse_init_out); 959 req->out.args[0].value = &req->misc.init_out; 960 req->end = process_init_reply; 961 fuse_request_send_background(fc, req); 962 } 963 964 static void fuse_free_conn(struct fuse_conn *fc) 965 { 966 WARN_ON(!list_empty(&fc->devices)); 967 kfree_rcu(fc, rcu); 968 } 969 970 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 971 { 972 int err; 973 char *suffix = ""; 974 975 if (sb->s_bdev) 976 suffix = "-fuseblk"; 977 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), 978 MINOR(fc->dev), suffix); 979 if (err) 980 return err; 981 982 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; 983 /* fuse does it's own writeback accounting */ 984 sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; 985 986 /* 987 * For a single fuse filesystem use max 1% of dirty + 988 * writeback threshold. 989 * 990 * This gives about 1M of write buffer for memory maps on a 991 * machine with 1G and 10% dirty_ratio, which should be more 992 * than enough. 993 * 994 * Privileged users can raise it by writing to 995 * 996 * /sys/class/bdi/<bdi>/max_ratio 997 */ 998 bdi_set_max_ratio(sb->s_bdi, 1); 999 1000 return 0; 1001 } 1002 1003 struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) 1004 { 1005 struct fuse_dev *fud; 1006 1007 fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); 1008 if (fud) { 1009 fud->fc = fuse_conn_get(fc); 1010 fuse_pqueue_init(&fud->pq); 1011 1012 spin_lock(&fc->lock); 1013 list_add_tail(&fud->entry, &fc->devices); 1014 spin_unlock(&fc->lock); 1015 } 1016 1017 return fud; 1018 } 1019 EXPORT_SYMBOL_GPL(fuse_dev_alloc); 1020 1021 void fuse_dev_free(struct fuse_dev *fud) 1022 { 1023 struct fuse_conn *fc = fud->fc; 1024 1025 if (fc) { 1026 spin_lock(&fc->lock); 1027 list_del(&fud->entry); 1028 spin_unlock(&fc->lock); 1029 1030 fuse_conn_put(fc); 1031 } 1032 kfree(fud); 1033 } 1034 EXPORT_SYMBOL_GPL(fuse_dev_free); 1035 1036 static int fuse_fill_super(struct super_block *sb, void *data, int silent) 1037 { 1038 struct fuse_dev *fud; 1039 struct fuse_conn *fc; 1040 struct inode *root; 1041 struct fuse_mount_data d; 1042 struct file *file; 1043 struct dentry *root_dentry; 1044 struct fuse_req *init_req; 1045 int err; 1046 int is_bdev = sb->s_bdev != NULL; 1047 1048 err = -EINVAL; 1049 if (sb->s_flags & MS_MANDLOCK) 1050 goto err; 1051 1052 sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); 1053 1054 if (!parse_fuse_opt(data, &d, is_bdev)) 1055 goto err; 1056 1057 if (is_bdev) { 1058 #ifdef CONFIG_BLOCK 1059 err = -EINVAL; 1060 if (!sb_set_blocksize(sb, d.blksize)) 1061 goto err; 1062 #endif 1063 } else { 1064 sb->s_blocksize = PAGE_SIZE; 1065 sb->s_blocksize_bits = PAGE_SHIFT; 1066 } 1067 sb->s_magic = FUSE_SUPER_MAGIC; 1068 sb->s_op = &fuse_super_operations; 1069 sb->s_xattr = fuse_xattr_handlers; 1070 sb->s_maxbytes = MAX_LFS_FILESIZE; 1071 sb->s_time_gran = 1; 1072 sb->s_export_op = &fuse_export_operations; 1073 1074 file = fget(d.fd); 1075 err = -EINVAL; 1076 if (!file) 1077 goto err; 1078 1079 if ((file->f_op != &fuse_dev_operations) || 1080 (file->f_cred->user_ns != &init_user_ns)) 1081 goto err_fput; 1082 1083 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1084 err = -ENOMEM; 1085 if (!fc) 1086 goto err_fput; 1087 1088 fuse_conn_init(fc); 1089 fc->release = fuse_free_conn; 1090 1091 fud = fuse_dev_alloc(fc); 1092 if (!fud) 1093 goto err_put_conn; 1094 1095 fc->dev = sb->s_dev; 1096 fc->sb = sb; 1097 err = fuse_bdi_init(fc, sb); 1098 if (err) 1099 goto err_dev_free; 1100 1101 /* Handle umasking inside the fuse code */ 1102 if (sb->s_flags & MS_POSIXACL) 1103 fc->dont_mask = 1; 1104 sb->s_flags |= MS_POSIXACL; 1105 1106 fc->default_permissions = d.default_permissions; 1107 fc->allow_other = d.allow_other; 1108 fc->user_id = d.user_id; 1109 fc->group_id = d.group_id; 1110 fc->max_read = max_t(unsigned, 4096, d.max_read); 1111 1112 /* Used by get_root_inode() */ 1113 sb->s_fs_info = fc; 1114 1115 err = -ENOMEM; 1116 root = fuse_get_root_inode(sb, d.rootmode); 1117 sb->s_d_op = &fuse_root_dentry_operations; 1118 root_dentry = d_make_root(root); 1119 if (!root_dentry) 1120 goto err_dev_free; 1121 /* Root dentry doesn't have .d_revalidate */ 1122 sb->s_d_op = &fuse_dentry_operations; 1123 1124 init_req = fuse_request_alloc(0); 1125 if (!init_req) 1126 goto err_put_root; 1127 __set_bit(FR_BACKGROUND, &init_req->flags); 1128 1129 if (is_bdev) { 1130 fc->destroy_req = fuse_request_alloc(0); 1131 if (!fc->destroy_req) 1132 goto err_free_init_req; 1133 } 1134 1135 mutex_lock(&fuse_mutex); 1136 err = -EINVAL; 1137 if (file->private_data) 1138 goto err_unlock; 1139 1140 err = fuse_ctl_add_conn(fc); 1141 if (err) 1142 goto err_unlock; 1143 1144 list_add_tail(&fc->entry, &fuse_conn_list); 1145 sb->s_root = root_dentry; 1146 file->private_data = fud; 1147 mutex_unlock(&fuse_mutex); 1148 /* 1149 * atomic_dec_and_test() in fput() provides the necessary 1150 * memory barrier for file->private_data to be visible on all 1151 * CPUs after this 1152 */ 1153 fput(file); 1154 1155 fuse_send_init(fc, init_req); 1156 1157 return 0; 1158 1159 err_unlock: 1160 mutex_unlock(&fuse_mutex); 1161 err_free_init_req: 1162 fuse_request_free(init_req); 1163 err_put_root: 1164 dput(root_dentry); 1165 err_dev_free: 1166 fuse_dev_free(fud); 1167 err_put_conn: 1168 fuse_conn_put(fc); 1169 err_fput: 1170 fput(file); 1171 err: 1172 return err; 1173 } 1174 1175 static struct dentry *fuse_mount(struct file_system_type *fs_type, 1176 int flags, const char *dev_name, 1177 void *raw_data) 1178 { 1179 return mount_nodev(fs_type, flags, raw_data, fuse_fill_super); 1180 } 1181 1182 static void fuse_kill_sb_anon(struct super_block *sb) 1183 { 1184 struct fuse_conn *fc = get_fuse_conn_super(sb); 1185 1186 if (fc) { 1187 down_write(&fc->killsb); 1188 fc->sb = NULL; 1189 up_write(&fc->killsb); 1190 } 1191 1192 kill_anon_super(sb); 1193 } 1194 1195 static struct file_system_type fuse_fs_type = { 1196 .owner = THIS_MODULE, 1197 .name = "fuse", 1198 .fs_flags = FS_HAS_SUBTYPE, 1199 .mount = fuse_mount, 1200 .kill_sb = fuse_kill_sb_anon, 1201 }; 1202 MODULE_ALIAS_FS("fuse"); 1203 1204 #ifdef CONFIG_BLOCK 1205 static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, 1206 int flags, const char *dev_name, 1207 void *raw_data) 1208 { 1209 return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super); 1210 } 1211 1212 static void fuse_kill_sb_blk(struct super_block *sb) 1213 { 1214 struct fuse_conn *fc = get_fuse_conn_super(sb); 1215 1216 if (fc) { 1217 down_write(&fc->killsb); 1218 fc->sb = NULL; 1219 up_write(&fc->killsb); 1220 } 1221 1222 kill_block_super(sb); 1223 } 1224 1225 static struct file_system_type fuseblk_fs_type = { 1226 .owner = THIS_MODULE, 1227 .name = "fuseblk", 1228 .mount = fuse_mount_blk, 1229 .kill_sb = fuse_kill_sb_blk, 1230 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1231 }; 1232 MODULE_ALIAS_FS("fuseblk"); 1233 1234 static inline int register_fuseblk(void) 1235 { 1236 return register_filesystem(&fuseblk_fs_type); 1237 } 1238 1239 static inline void unregister_fuseblk(void) 1240 { 1241 unregister_filesystem(&fuseblk_fs_type); 1242 } 1243 #else 1244 static inline int register_fuseblk(void) 1245 { 1246 return 0; 1247 } 1248 1249 static inline void unregister_fuseblk(void) 1250 { 1251 } 1252 #endif 1253 1254 static void fuse_inode_init_once(void *foo) 1255 { 1256 struct inode *inode = foo; 1257 1258 inode_init_once(inode); 1259 } 1260 1261 static int __init fuse_fs_init(void) 1262 { 1263 int err; 1264 1265 fuse_inode_cachep = kmem_cache_create("fuse_inode", 1266 sizeof(struct fuse_inode), 0, 1267 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, 1268 fuse_inode_init_once); 1269 err = -ENOMEM; 1270 if (!fuse_inode_cachep) 1271 goto out; 1272 1273 err = register_fuseblk(); 1274 if (err) 1275 goto out2; 1276 1277 err = register_filesystem(&fuse_fs_type); 1278 if (err) 1279 goto out3; 1280 1281 return 0; 1282 1283 out3: 1284 unregister_fuseblk(); 1285 out2: 1286 kmem_cache_destroy(fuse_inode_cachep); 1287 out: 1288 return err; 1289 } 1290 1291 static void fuse_fs_cleanup(void) 1292 { 1293 unregister_filesystem(&fuse_fs_type); 1294 unregister_fuseblk(); 1295 1296 /* 1297 * Make sure all delayed rcu free inodes are flushed before we 1298 * destroy cache. 1299 */ 1300 rcu_barrier(); 1301 kmem_cache_destroy(fuse_inode_cachep); 1302 } 1303 1304 static struct kobject *fuse_kobj; 1305 1306 static int fuse_sysfs_init(void) 1307 { 1308 int err; 1309 1310 fuse_kobj = kobject_create_and_add("fuse", fs_kobj); 1311 if (!fuse_kobj) { 1312 err = -ENOMEM; 1313 goto out_err; 1314 } 1315 1316 err = sysfs_create_mount_point(fuse_kobj, "connections"); 1317 if (err) 1318 goto out_fuse_unregister; 1319 1320 return 0; 1321 1322 out_fuse_unregister: 1323 kobject_put(fuse_kobj); 1324 out_err: 1325 return err; 1326 } 1327 1328 static void fuse_sysfs_cleanup(void) 1329 { 1330 sysfs_remove_mount_point(fuse_kobj, "connections"); 1331 kobject_put(fuse_kobj); 1332 } 1333 1334 static int __init fuse_init(void) 1335 { 1336 int res; 1337 1338 printk(KERN_INFO "fuse init (API version %i.%i)\n", 1339 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 1340 1341 INIT_LIST_HEAD(&fuse_conn_list); 1342 res = fuse_fs_init(); 1343 if (res) 1344 goto err; 1345 1346 res = fuse_dev_init(); 1347 if (res) 1348 goto err_fs_cleanup; 1349 1350 res = fuse_sysfs_init(); 1351 if (res) 1352 goto err_dev_cleanup; 1353 1354 res = fuse_ctl_init(); 1355 if (res) 1356 goto err_sysfs_cleanup; 1357 1358 sanitize_global_limit(&max_user_bgreq); 1359 sanitize_global_limit(&max_user_congthresh); 1360 1361 return 0; 1362 1363 err_sysfs_cleanup: 1364 fuse_sysfs_cleanup(); 1365 err_dev_cleanup: 1366 fuse_dev_cleanup(); 1367 err_fs_cleanup: 1368 fuse_fs_cleanup(); 1369 err: 1370 return res; 1371 } 1372 1373 static void __exit fuse_exit(void) 1374 { 1375 printk(KERN_DEBUG "fuse exit\n"); 1376 1377 fuse_ctl_cleanup(); 1378 fuse_sysfs_cleanup(); 1379 fuse_fs_cleanup(); 1380 fuse_dev_cleanup(); 1381 } 1382 1383 module_init(fuse_init); 1384 module_exit(fuse_exit); 1385