1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/file.h> 14 #include <linux/seq_file.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/moduleparam.h> 18 #include <linux/fs_context.h> 19 #include <linux/fs_parser.h> 20 #include <linux/statfs.h> 21 #include <linux/random.h> 22 #include <linux/sched.h> 23 #include <linux/exportfs.h> 24 #include <linux/posix_acl.h> 25 #include <linux/pid_namespace.h> 26 27 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 28 MODULE_DESCRIPTION("Filesystem in Userspace"); 29 MODULE_LICENSE("GPL"); 30 31 static struct kmem_cache *fuse_inode_cachep; 32 struct list_head fuse_conn_list; 33 DEFINE_MUTEX(fuse_mutex); 34 35 static int set_global_limit(const char *val, const struct kernel_param *kp); 36 37 unsigned max_user_bgreq; 38 module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 39 &max_user_bgreq, 0644); 40 __MODULE_PARM_TYPE(max_user_bgreq, "uint"); 41 MODULE_PARM_DESC(max_user_bgreq, 42 "Global limit for the maximum number of backgrounded requests an " 43 "unprivileged user can set"); 44 45 unsigned max_user_congthresh; 46 module_param_call(max_user_congthresh, set_global_limit, param_get_uint, 47 &max_user_congthresh, 0644); 48 __MODULE_PARM_TYPE(max_user_congthresh, "uint"); 49 MODULE_PARM_DESC(max_user_congthresh, 50 "Global limit for the maximum congestion threshold an " 51 "unprivileged user can set"); 52 53 #define FUSE_SUPER_MAGIC 0x65735546 54 55 #define FUSE_DEFAULT_BLKSIZE 512 56 57 /** Maximum number of outstanding background requests */ 58 #define FUSE_DEFAULT_MAX_BACKGROUND 12 59 60 /** Congestion starts at 75% of maximum */ 61 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) 62 63 #ifdef CONFIG_BLOCK 64 static struct file_system_type fuseblk_fs_type; 65 #endif 66 67 struct fuse_forget_link *fuse_alloc_forget(void) 68 { 69 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); 70 } 71 72 static struct inode *fuse_alloc_inode(struct super_block *sb) 73 { 74 struct fuse_inode *fi; 75 76 fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); 77 if (!fi) 78 return NULL; 79 80 fi->i_time = 0; 81 fi->inval_mask = 0; 82 fi->nodeid = 0; 83 fi->nlookup = 0; 84 fi->attr_version = 0; 85 fi->orig_ino = 0; 86 fi->state = 0; 87 mutex_init(&fi->mutex); 88 init_rwsem(&fi->i_mmap_sem); 89 spin_lock_init(&fi->lock); 90 fi->forget = fuse_alloc_forget(); 91 if (!fi->forget) 92 goto out_free; 93 94 if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 95 goto out_free_forget; 96 97 return &fi->inode; 98 99 out_free_forget: 100 kfree(fi->forget); 101 out_free: 102 kmem_cache_free(fuse_inode_cachep, fi); 103 return NULL; 104 } 105 106 static void fuse_free_inode(struct inode *inode) 107 { 108 struct fuse_inode *fi = get_fuse_inode(inode); 109 110 mutex_destroy(&fi->mutex); 111 kfree(fi->forget); 112 #ifdef CONFIG_FUSE_DAX 113 kfree(fi->dax); 114 #endif 115 kmem_cache_free(fuse_inode_cachep, fi); 116 } 117 118 static void fuse_evict_inode(struct inode *inode) 119 { 120 struct fuse_inode *fi = get_fuse_inode(inode); 121 122 truncate_inode_pages_final(&inode->i_data); 123 clear_inode(inode); 124 if (inode->i_sb->s_flags & SB_ACTIVE) { 125 struct fuse_conn *fc = get_fuse_conn(inode); 126 127 if (FUSE_IS_DAX(inode)) 128 fuse_dax_inode_cleanup(inode); 129 if (fi->nlookup) { 130 fuse_queue_forget(fc, fi->forget, fi->nodeid, 131 fi->nlookup); 132 fi->forget = NULL; 133 } 134 } 135 if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { 136 WARN_ON(!list_empty(&fi->write_files)); 137 WARN_ON(!list_empty(&fi->queued_writes)); 138 } 139 } 140 141 static int fuse_reconfigure(struct fs_context *fc) 142 { 143 struct super_block *sb = fc->root->d_sb; 144 145 sync_filesystem(sb); 146 if (fc->sb_flags & SB_MANDLOCK) 147 return -EINVAL; 148 149 return 0; 150 } 151 152 /* 153 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down 154 * so that it will fit. 155 */ 156 static ino_t fuse_squash_ino(u64 ino64) 157 { 158 ino_t ino = (ino_t) ino64; 159 if (sizeof(ino_t) < sizeof(u64)) 160 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; 161 return ino; 162 } 163 164 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 165 u64 attr_valid) 166 { 167 struct fuse_conn *fc = get_fuse_conn(inode); 168 struct fuse_inode *fi = get_fuse_inode(inode); 169 170 lockdep_assert_held(&fi->lock); 171 172 fi->attr_version = atomic64_inc_return(&fc->attr_version); 173 fi->i_time = attr_valid; 174 WRITE_ONCE(fi->inval_mask, 0); 175 176 inode->i_ino = fuse_squash_ino(attr->ino); 177 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 178 set_nlink(inode, attr->nlink); 179 inode->i_uid = make_kuid(fc->user_ns, attr->uid); 180 inode->i_gid = make_kgid(fc->user_ns, attr->gid); 181 inode->i_blocks = attr->blocks; 182 inode->i_atime.tv_sec = attr->atime; 183 inode->i_atime.tv_nsec = attr->atimensec; 184 /* mtime from server may be stale due to local buffered write */ 185 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { 186 inode->i_mtime.tv_sec = attr->mtime; 187 inode->i_mtime.tv_nsec = attr->mtimensec; 188 inode->i_ctime.tv_sec = attr->ctime; 189 inode->i_ctime.tv_nsec = attr->ctimensec; 190 } 191 192 if (attr->blksize != 0) 193 inode->i_blkbits = ilog2(attr->blksize); 194 else 195 inode->i_blkbits = inode->i_sb->s_blocksize_bits; 196 197 /* 198 * Don't set the sticky bit in i_mode, unless we want the VFS 199 * to check permissions. This prevents failures due to the 200 * check in may_delete(). 201 */ 202 fi->orig_i_mode = inode->i_mode; 203 if (!fc->default_permissions) 204 inode->i_mode &= ~S_ISVTX; 205 206 fi->orig_ino = attr->ino; 207 } 208 209 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 210 u64 attr_valid, u64 attr_version) 211 { 212 struct fuse_conn *fc = get_fuse_conn(inode); 213 struct fuse_inode *fi = get_fuse_inode(inode); 214 bool is_wb = fc->writeback_cache; 215 loff_t oldsize; 216 struct timespec64 old_mtime; 217 218 spin_lock(&fi->lock); 219 if ((attr_version != 0 && fi->attr_version > attr_version) || 220 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 221 spin_unlock(&fi->lock); 222 return; 223 } 224 225 old_mtime = inode->i_mtime; 226 fuse_change_attributes_common(inode, attr, attr_valid); 227 228 oldsize = inode->i_size; 229 /* 230 * In case of writeback_cache enabled, the cached writes beyond EOF 231 * extend local i_size without keeping userspace server in sync. So, 232 * attr->size coming from server can be stale. We cannot trust it. 233 */ 234 if (!is_wb || !S_ISREG(inode->i_mode)) 235 i_size_write(inode, attr->size); 236 spin_unlock(&fi->lock); 237 238 if (!is_wb && S_ISREG(inode->i_mode)) { 239 bool inval = false; 240 241 if (oldsize != attr->size) { 242 truncate_pagecache(inode, attr->size); 243 if (!fc->explicit_inval_data) 244 inval = true; 245 } else if (fc->auto_inval_data) { 246 struct timespec64 new_mtime = { 247 .tv_sec = attr->mtime, 248 .tv_nsec = attr->mtimensec, 249 }; 250 251 /* 252 * Auto inval mode also checks and invalidates if mtime 253 * has changed. 254 */ 255 if (!timespec64_equal(&old_mtime, &new_mtime)) 256 inval = true; 257 } 258 259 if (inval) 260 invalidate_inode_pages2(inode->i_mapping); 261 } 262 } 263 264 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) 265 { 266 inode->i_mode = attr->mode & S_IFMT; 267 inode->i_size = attr->size; 268 inode->i_mtime.tv_sec = attr->mtime; 269 inode->i_mtime.tv_nsec = attr->mtimensec; 270 inode->i_ctime.tv_sec = attr->ctime; 271 inode->i_ctime.tv_nsec = attr->ctimensec; 272 if (S_ISREG(inode->i_mode)) { 273 fuse_init_common(inode); 274 fuse_init_file_inode(inode); 275 } else if (S_ISDIR(inode->i_mode)) 276 fuse_init_dir(inode); 277 else if (S_ISLNK(inode->i_mode)) 278 fuse_init_symlink(inode); 279 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 280 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 281 fuse_init_common(inode); 282 init_special_inode(inode, inode->i_mode, 283 new_decode_dev(attr->rdev)); 284 } else 285 BUG(); 286 } 287 288 static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 289 { 290 u64 nodeid = *(u64 *) _nodeidp; 291 if (get_node_id(inode) == nodeid) 292 return 1; 293 else 294 return 0; 295 } 296 297 static int fuse_inode_set(struct inode *inode, void *_nodeidp) 298 { 299 u64 nodeid = *(u64 *) _nodeidp; 300 get_fuse_inode(inode)->nodeid = nodeid; 301 return 0; 302 } 303 304 struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 305 int generation, struct fuse_attr *attr, 306 u64 attr_valid, u64 attr_version) 307 { 308 struct inode *inode; 309 struct fuse_inode *fi; 310 struct fuse_conn *fc = get_fuse_conn_super(sb); 311 312 /* 313 * Auto mount points get their node id from the submount root, which is 314 * not a unique identifier within this filesystem. 315 * 316 * To avoid conflicts, do not place submount points into the inode hash 317 * table. 318 */ 319 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 320 S_ISDIR(attr->mode)) { 321 inode = new_inode(sb); 322 if (!inode) 323 return NULL; 324 325 fuse_init_inode(inode, attr); 326 get_fuse_inode(inode)->nodeid = nodeid; 327 inode->i_flags |= S_AUTOMOUNT; 328 goto done; 329 } 330 331 retry: 332 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 333 if (!inode) 334 return NULL; 335 336 if ((inode->i_state & I_NEW)) { 337 inode->i_flags |= S_NOATIME; 338 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 339 inode->i_flags |= S_NOCMTIME; 340 inode->i_generation = generation; 341 fuse_init_inode(inode, attr); 342 unlock_new_inode(inode); 343 } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { 344 /* Inode has changed type, any I/O on the old should fail */ 345 make_bad_inode(inode); 346 iput(inode); 347 goto retry; 348 } 349 done: 350 fi = get_fuse_inode(inode); 351 spin_lock(&fi->lock); 352 fi->nlookup++; 353 spin_unlock(&fi->lock); 354 fuse_change_attributes(inode, attr, attr_valid, attr_version); 355 356 return inode; 357 } 358 359 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 360 struct fuse_mount **fm) 361 { 362 struct fuse_mount *fm_iter; 363 struct inode *inode; 364 365 WARN_ON(!rwsem_is_locked(&fc->killsb)); 366 list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 367 if (!fm_iter->sb) 368 continue; 369 370 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 371 if (inode) { 372 if (fm) 373 *fm = fm_iter; 374 return inode; 375 } 376 } 377 378 return NULL; 379 } 380 381 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 382 loff_t offset, loff_t len) 383 { 384 struct fuse_inode *fi; 385 struct inode *inode; 386 pgoff_t pg_start; 387 pgoff_t pg_end; 388 389 inode = fuse_ilookup(fc, nodeid, NULL); 390 if (!inode) 391 return -ENOENT; 392 393 fi = get_fuse_inode(inode); 394 spin_lock(&fi->lock); 395 fi->attr_version = atomic64_inc_return(&fc->attr_version); 396 spin_unlock(&fi->lock); 397 398 fuse_invalidate_attr(inode); 399 forget_all_cached_acls(inode); 400 if (offset >= 0) { 401 pg_start = offset >> PAGE_SHIFT; 402 if (len <= 0) 403 pg_end = -1; 404 else 405 pg_end = (offset + len - 1) >> PAGE_SHIFT; 406 invalidate_inode_pages2_range(inode->i_mapping, 407 pg_start, pg_end); 408 } 409 iput(inode); 410 return 0; 411 } 412 413 bool fuse_lock_inode(struct inode *inode) 414 { 415 bool locked = false; 416 417 if (!get_fuse_conn(inode)->parallel_dirops) { 418 mutex_lock(&get_fuse_inode(inode)->mutex); 419 locked = true; 420 } 421 422 return locked; 423 } 424 425 void fuse_unlock_inode(struct inode *inode, bool locked) 426 { 427 if (locked) 428 mutex_unlock(&get_fuse_inode(inode)->mutex); 429 } 430 431 static void fuse_umount_begin(struct super_block *sb) 432 { 433 struct fuse_conn *fc = get_fuse_conn_super(sb); 434 435 if (!fc->no_force_umount) 436 fuse_abort_conn(fc); 437 } 438 439 static void fuse_send_destroy(struct fuse_mount *fm) 440 { 441 if (fm->fc->conn_init) { 442 FUSE_ARGS(args); 443 444 args.opcode = FUSE_DESTROY; 445 args.force = true; 446 args.nocreds = true; 447 fuse_simple_request(fm, &args); 448 } 449 } 450 451 static void fuse_put_super(struct super_block *sb) 452 { 453 struct fuse_mount *fm = get_fuse_mount_super(sb); 454 455 fuse_mount_put(fm); 456 } 457 458 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 459 { 460 stbuf->f_type = FUSE_SUPER_MAGIC; 461 stbuf->f_bsize = attr->bsize; 462 stbuf->f_frsize = attr->frsize; 463 stbuf->f_blocks = attr->blocks; 464 stbuf->f_bfree = attr->bfree; 465 stbuf->f_bavail = attr->bavail; 466 stbuf->f_files = attr->files; 467 stbuf->f_ffree = attr->ffree; 468 stbuf->f_namelen = attr->namelen; 469 /* fsid is left zero */ 470 } 471 472 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 473 { 474 struct super_block *sb = dentry->d_sb; 475 struct fuse_mount *fm = get_fuse_mount_super(sb); 476 FUSE_ARGS(args); 477 struct fuse_statfs_out outarg; 478 int err; 479 480 if (!fuse_allow_current_process(fm->fc)) { 481 buf->f_type = FUSE_SUPER_MAGIC; 482 return 0; 483 } 484 485 memset(&outarg, 0, sizeof(outarg)); 486 args.in_numargs = 0; 487 args.opcode = FUSE_STATFS; 488 args.nodeid = get_node_id(d_inode(dentry)); 489 args.out_numargs = 1; 490 args.out_args[0].size = sizeof(outarg); 491 args.out_args[0].value = &outarg; 492 err = fuse_simple_request(fm, &args); 493 if (!err) 494 convert_fuse_statfs(buf, &outarg.st); 495 return err; 496 } 497 498 enum { 499 OPT_SOURCE, 500 OPT_SUBTYPE, 501 OPT_FD, 502 OPT_ROOTMODE, 503 OPT_USER_ID, 504 OPT_GROUP_ID, 505 OPT_DEFAULT_PERMISSIONS, 506 OPT_ALLOW_OTHER, 507 OPT_MAX_READ, 508 OPT_BLKSIZE, 509 OPT_ERR 510 }; 511 512 static const struct fs_parameter_spec fuse_fs_parameters[] = { 513 fsparam_string ("source", OPT_SOURCE), 514 fsparam_u32 ("fd", OPT_FD), 515 fsparam_u32oct ("rootmode", OPT_ROOTMODE), 516 fsparam_u32 ("user_id", OPT_USER_ID), 517 fsparam_u32 ("group_id", OPT_GROUP_ID), 518 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), 519 fsparam_flag ("allow_other", OPT_ALLOW_OTHER), 520 fsparam_u32 ("max_read", OPT_MAX_READ), 521 fsparam_u32 ("blksize", OPT_BLKSIZE), 522 fsparam_string ("subtype", OPT_SUBTYPE), 523 {} 524 }; 525 526 static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) 527 { 528 struct fs_parse_result result; 529 struct fuse_fs_context *ctx = fc->fs_private; 530 int opt; 531 532 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 533 /* 534 * Ignore options coming from mount(MS_REMOUNT) for backward 535 * compatibility. 536 */ 537 if (fc->oldapi) 538 return 0; 539 540 return invalfc(fc, "No changes allowed in reconfigure"); 541 } 542 543 opt = fs_parse(fc, fuse_fs_parameters, param, &result); 544 if (opt < 0) 545 return opt; 546 547 switch (opt) { 548 case OPT_SOURCE: 549 if (fc->source) 550 return invalfc(fc, "Multiple sources specified"); 551 fc->source = param->string; 552 param->string = NULL; 553 break; 554 555 case OPT_SUBTYPE: 556 if (ctx->subtype) 557 return invalfc(fc, "Multiple subtypes specified"); 558 ctx->subtype = param->string; 559 param->string = NULL; 560 return 0; 561 562 case OPT_FD: 563 ctx->fd = result.uint_32; 564 ctx->fd_present = true; 565 break; 566 567 case OPT_ROOTMODE: 568 if (!fuse_valid_type(result.uint_32)) 569 return invalfc(fc, "Invalid rootmode"); 570 ctx->rootmode = result.uint_32; 571 ctx->rootmode_present = true; 572 break; 573 574 case OPT_USER_ID: 575 ctx->user_id = make_kuid(fc->user_ns, result.uint_32); 576 if (!uid_valid(ctx->user_id)) 577 return invalfc(fc, "Invalid user_id"); 578 ctx->user_id_present = true; 579 break; 580 581 case OPT_GROUP_ID: 582 ctx->group_id = make_kgid(fc->user_ns, result.uint_32); 583 if (!gid_valid(ctx->group_id)) 584 return invalfc(fc, "Invalid group_id"); 585 ctx->group_id_present = true; 586 break; 587 588 case OPT_DEFAULT_PERMISSIONS: 589 ctx->default_permissions = true; 590 break; 591 592 case OPT_ALLOW_OTHER: 593 ctx->allow_other = true; 594 break; 595 596 case OPT_MAX_READ: 597 ctx->max_read = result.uint_32; 598 break; 599 600 case OPT_BLKSIZE: 601 if (!ctx->is_bdev) 602 return invalfc(fc, "blksize only supported for fuseblk"); 603 ctx->blksize = result.uint_32; 604 break; 605 606 default: 607 return -EINVAL; 608 } 609 610 return 0; 611 } 612 613 static void fuse_free_fc(struct fs_context *fc) 614 { 615 struct fuse_fs_context *ctx = fc->fs_private; 616 617 if (ctx) { 618 kfree(ctx->subtype); 619 kfree(ctx); 620 } 621 } 622 623 static int fuse_show_options(struct seq_file *m, struct dentry *root) 624 { 625 struct super_block *sb = root->d_sb; 626 struct fuse_conn *fc = get_fuse_conn_super(sb); 627 628 if (fc->legacy_opts_show) { 629 seq_printf(m, ",user_id=%u", 630 from_kuid_munged(fc->user_ns, fc->user_id)); 631 seq_printf(m, ",group_id=%u", 632 from_kgid_munged(fc->user_ns, fc->group_id)); 633 if (fc->default_permissions) 634 seq_puts(m, ",default_permissions"); 635 if (fc->allow_other) 636 seq_puts(m, ",allow_other"); 637 if (fc->max_read != ~0) 638 seq_printf(m, ",max_read=%u", fc->max_read); 639 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 640 seq_printf(m, ",blksize=%lu", sb->s_blocksize); 641 } 642 #ifdef CONFIG_FUSE_DAX 643 if (fc->dax) 644 seq_puts(m, ",dax"); 645 #endif 646 647 return 0; 648 } 649 650 static void fuse_iqueue_init(struct fuse_iqueue *fiq, 651 const struct fuse_iqueue_ops *ops, 652 void *priv) 653 { 654 memset(fiq, 0, sizeof(struct fuse_iqueue)); 655 spin_lock_init(&fiq->lock); 656 init_waitqueue_head(&fiq->waitq); 657 INIT_LIST_HEAD(&fiq->pending); 658 INIT_LIST_HEAD(&fiq->interrupts); 659 fiq->forget_list_tail = &fiq->forget_list_head; 660 fiq->connected = 1; 661 fiq->ops = ops; 662 fiq->priv = priv; 663 } 664 665 static void fuse_pqueue_init(struct fuse_pqueue *fpq) 666 { 667 unsigned int i; 668 669 spin_lock_init(&fpq->lock); 670 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) 671 INIT_LIST_HEAD(&fpq->processing[i]); 672 INIT_LIST_HEAD(&fpq->io); 673 fpq->connected = 1; 674 } 675 676 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 677 struct user_namespace *user_ns, 678 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 679 { 680 memset(fc, 0, sizeof(*fc)); 681 spin_lock_init(&fc->lock); 682 spin_lock_init(&fc->bg_lock); 683 init_rwsem(&fc->killsb); 684 refcount_set(&fc->count, 1); 685 atomic_set(&fc->dev_count, 1); 686 init_waitqueue_head(&fc->blocked_waitq); 687 fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); 688 INIT_LIST_HEAD(&fc->bg_queue); 689 INIT_LIST_HEAD(&fc->entry); 690 INIT_LIST_HEAD(&fc->devices); 691 atomic_set(&fc->num_waiting, 0); 692 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 693 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 694 atomic64_set(&fc->khctr, 0); 695 fc->polled_files = RB_ROOT; 696 fc->blocked = 0; 697 fc->initialized = 0; 698 fc->connected = 1; 699 atomic64_set(&fc->attr_version, 1); 700 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 701 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 702 fc->user_ns = get_user_ns(user_ns); 703 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 704 705 INIT_LIST_HEAD(&fc->mounts); 706 list_add(&fm->fc_entry, &fc->mounts); 707 fm->fc = fc; 708 refcount_set(&fm->count, 1); 709 } 710 EXPORT_SYMBOL_GPL(fuse_conn_init); 711 712 void fuse_conn_put(struct fuse_conn *fc) 713 { 714 if (refcount_dec_and_test(&fc->count)) { 715 struct fuse_iqueue *fiq = &fc->iq; 716 717 if (IS_ENABLED(CONFIG_FUSE_DAX)) 718 fuse_dax_conn_free(fc); 719 if (fiq->ops->release) 720 fiq->ops->release(fiq); 721 put_pid_ns(fc->pid_ns); 722 put_user_ns(fc->user_ns); 723 fc->release(fc); 724 } 725 } 726 EXPORT_SYMBOL_GPL(fuse_conn_put); 727 728 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) 729 { 730 refcount_inc(&fc->count); 731 return fc; 732 } 733 EXPORT_SYMBOL_GPL(fuse_conn_get); 734 735 void fuse_mount_put(struct fuse_mount *fm) 736 { 737 if (refcount_dec_and_test(&fm->count)) { 738 if (fm->fc) 739 fuse_conn_put(fm->fc); 740 kfree(fm); 741 } 742 } 743 EXPORT_SYMBOL_GPL(fuse_mount_put); 744 745 struct fuse_mount *fuse_mount_get(struct fuse_mount *fm) 746 { 747 refcount_inc(&fm->count); 748 return fm; 749 } 750 EXPORT_SYMBOL_GPL(fuse_mount_get); 751 752 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 753 { 754 struct fuse_attr attr; 755 memset(&attr, 0, sizeof(attr)); 756 757 attr.mode = mode; 758 attr.ino = FUSE_ROOT_ID; 759 attr.nlink = 1; 760 return fuse_iget(sb, 1, 0, &attr, 0, 0); 761 } 762 763 struct fuse_inode_handle { 764 u64 nodeid; 765 u32 generation; 766 }; 767 768 static struct dentry *fuse_get_dentry(struct super_block *sb, 769 struct fuse_inode_handle *handle) 770 { 771 struct fuse_conn *fc = get_fuse_conn_super(sb); 772 struct inode *inode; 773 struct dentry *entry; 774 int err = -ESTALE; 775 776 if (handle->nodeid == 0) 777 goto out_err; 778 779 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); 780 if (!inode) { 781 struct fuse_entry_out outarg; 782 const struct qstr name = QSTR_INIT(".", 1); 783 784 if (!fc->export_support) 785 goto out_err; 786 787 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, 788 &inode); 789 if (err && err != -ENOENT) 790 goto out_err; 791 if (err || !inode) { 792 err = -ESTALE; 793 goto out_err; 794 } 795 err = -EIO; 796 if (get_node_id(inode) != handle->nodeid) 797 goto out_iput; 798 } 799 err = -ESTALE; 800 if (inode->i_generation != handle->generation) 801 goto out_iput; 802 803 entry = d_obtain_alias(inode); 804 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) 805 fuse_invalidate_entry_cache(entry); 806 807 return entry; 808 809 out_iput: 810 iput(inode); 811 out_err: 812 return ERR_PTR(err); 813 } 814 815 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, 816 struct inode *parent) 817 { 818 int len = parent ? 6 : 3; 819 u64 nodeid; 820 u32 generation; 821 822 if (*max_len < len) { 823 *max_len = len; 824 return FILEID_INVALID; 825 } 826 827 nodeid = get_fuse_inode(inode)->nodeid; 828 generation = inode->i_generation; 829 830 fh[0] = (u32)(nodeid >> 32); 831 fh[1] = (u32)(nodeid & 0xffffffff); 832 fh[2] = generation; 833 834 if (parent) { 835 nodeid = get_fuse_inode(parent)->nodeid; 836 generation = parent->i_generation; 837 838 fh[3] = (u32)(nodeid >> 32); 839 fh[4] = (u32)(nodeid & 0xffffffff); 840 fh[5] = generation; 841 } 842 843 *max_len = len; 844 return parent ? 0x82 : 0x81; 845 } 846 847 static struct dentry *fuse_fh_to_dentry(struct super_block *sb, 848 struct fid *fid, int fh_len, int fh_type) 849 { 850 struct fuse_inode_handle handle; 851 852 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) 853 return NULL; 854 855 handle.nodeid = (u64) fid->raw[0] << 32; 856 handle.nodeid |= (u64) fid->raw[1]; 857 handle.generation = fid->raw[2]; 858 return fuse_get_dentry(sb, &handle); 859 } 860 861 static struct dentry *fuse_fh_to_parent(struct super_block *sb, 862 struct fid *fid, int fh_len, int fh_type) 863 { 864 struct fuse_inode_handle parent; 865 866 if (fh_type != 0x82 || fh_len < 6) 867 return NULL; 868 869 parent.nodeid = (u64) fid->raw[3] << 32; 870 parent.nodeid |= (u64) fid->raw[4]; 871 parent.generation = fid->raw[5]; 872 return fuse_get_dentry(sb, &parent); 873 } 874 875 static struct dentry *fuse_get_parent(struct dentry *child) 876 { 877 struct inode *child_inode = d_inode(child); 878 struct fuse_conn *fc = get_fuse_conn(child_inode); 879 struct inode *inode; 880 struct dentry *parent; 881 struct fuse_entry_out outarg; 882 const struct qstr name = QSTR_INIT("..", 2); 883 int err; 884 885 if (!fc->export_support) 886 return ERR_PTR(-ESTALE); 887 888 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 889 &name, &outarg, &inode); 890 if (err) { 891 if (err == -ENOENT) 892 return ERR_PTR(-ESTALE); 893 return ERR_PTR(err); 894 } 895 896 parent = d_obtain_alias(inode); 897 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) 898 fuse_invalidate_entry_cache(parent); 899 900 return parent; 901 } 902 903 static const struct export_operations fuse_export_operations = { 904 .fh_to_dentry = fuse_fh_to_dentry, 905 .fh_to_parent = fuse_fh_to_parent, 906 .encode_fh = fuse_encode_fh, 907 .get_parent = fuse_get_parent, 908 }; 909 910 static const struct super_operations fuse_super_operations = { 911 .alloc_inode = fuse_alloc_inode, 912 .free_inode = fuse_free_inode, 913 .evict_inode = fuse_evict_inode, 914 .write_inode = fuse_write_inode, 915 .drop_inode = generic_delete_inode, 916 .put_super = fuse_put_super, 917 .umount_begin = fuse_umount_begin, 918 .statfs = fuse_statfs, 919 .show_options = fuse_show_options, 920 }; 921 922 static void sanitize_global_limit(unsigned *limit) 923 { 924 /* 925 * The default maximum number of async requests is calculated to consume 926 * 1/2^13 of the total memory, assuming 392 bytes per request. 927 */ 928 if (*limit == 0) 929 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; 930 931 if (*limit >= 1 << 16) 932 *limit = (1 << 16) - 1; 933 } 934 935 static int set_global_limit(const char *val, const struct kernel_param *kp) 936 { 937 int rv; 938 939 rv = param_set_uint(val, kp); 940 if (rv) 941 return rv; 942 943 sanitize_global_limit((unsigned *)kp->arg); 944 945 return 0; 946 } 947 948 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) 949 { 950 int cap_sys_admin = capable(CAP_SYS_ADMIN); 951 952 if (arg->minor < 13) 953 return; 954 955 sanitize_global_limit(&max_user_bgreq); 956 sanitize_global_limit(&max_user_congthresh); 957 958 spin_lock(&fc->bg_lock); 959 if (arg->max_background) { 960 fc->max_background = arg->max_background; 961 962 if (!cap_sys_admin && fc->max_background > max_user_bgreq) 963 fc->max_background = max_user_bgreq; 964 } 965 if (arg->congestion_threshold) { 966 fc->congestion_threshold = arg->congestion_threshold; 967 968 if (!cap_sys_admin && 969 fc->congestion_threshold > max_user_congthresh) 970 fc->congestion_threshold = max_user_congthresh; 971 } 972 spin_unlock(&fc->bg_lock); 973 } 974 975 struct fuse_init_args { 976 struct fuse_args args; 977 struct fuse_init_in in; 978 struct fuse_init_out out; 979 }; 980 981 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, 982 int error) 983 { 984 struct fuse_conn *fc = fm->fc; 985 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 986 struct fuse_init_out *arg = &ia->out; 987 bool ok = true; 988 989 if (error || arg->major != FUSE_KERNEL_VERSION) 990 ok = false; 991 else { 992 unsigned long ra_pages; 993 994 process_init_limits(fc, arg); 995 996 if (arg->minor >= 6) { 997 ra_pages = arg->max_readahead / PAGE_SIZE; 998 if (arg->flags & FUSE_ASYNC_READ) 999 fc->async_read = 1; 1000 if (!(arg->flags & FUSE_POSIX_LOCKS)) 1001 fc->no_lock = 1; 1002 if (arg->minor >= 17) { 1003 if (!(arg->flags & FUSE_FLOCK_LOCKS)) 1004 fc->no_flock = 1; 1005 } else { 1006 if (!(arg->flags & FUSE_POSIX_LOCKS)) 1007 fc->no_flock = 1; 1008 } 1009 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 1010 fc->atomic_o_trunc = 1; 1011 if (arg->minor >= 9) { 1012 /* LOOKUP has dependency on proto version */ 1013 if (arg->flags & FUSE_EXPORT_SUPPORT) 1014 fc->export_support = 1; 1015 } 1016 if (arg->flags & FUSE_BIG_WRITES) 1017 fc->big_writes = 1; 1018 if (arg->flags & FUSE_DONT_MASK) 1019 fc->dont_mask = 1; 1020 if (arg->flags & FUSE_AUTO_INVAL_DATA) 1021 fc->auto_inval_data = 1; 1022 else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA) 1023 fc->explicit_inval_data = 1; 1024 if (arg->flags & FUSE_DO_READDIRPLUS) { 1025 fc->do_readdirplus = 1; 1026 if (arg->flags & FUSE_READDIRPLUS_AUTO) 1027 fc->readdirplus_auto = 1; 1028 } 1029 if (arg->flags & FUSE_ASYNC_DIO) 1030 fc->async_dio = 1; 1031 if (arg->flags & FUSE_WRITEBACK_CACHE) 1032 fc->writeback_cache = 1; 1033 if (arg->flags & FUSE_PARALLEL_DIROPS) 1034 fc->parallel_dirops = 1; 1035 if (arg->flags & FUSE_HANDLE_KILLPRIV) 1036 fc->handle_killpriv = 1; 1037 if (arg->time_gran && arg->time_gran <= 1000000000) 1038 fm->sb->s_time_gran = arg->time_gran; 1039 if ((arg->flags & FUSE_POSIX_ACL)) { 1040 fc->default_permissions = 1; 1041 fc->posix_acl = 1; 1042 fm->sb->s_xattr = fuse_acl_xattr_handlers; 1043 } 1044 if (arg->flags & FUSE_CACHE_SYMLINKS) 1045 fc->cache_symlinks = 1; 1046 if (arg->flags & FUSE_ABORT_ERROR) 1047 fc->abort_err = 1; 1048 if (arg->flags & FUSE_MAX_PAGES) { 1049 fc->max_pages = 1050 min_t(unsigned int, FUSE_MAX_MAX_PAGES, 1051 max_t(unsigned int, arg->max_pages, 1)); 1052 } 1053 if (IS_ENABLED(CONFIG_FUSE_DAX) && 1054 arg->flags & FUSE_MAP_ALIGNMENT && 1055 !fuse_dax_check_alignment(fc, arg->map_alignment)) { 1056 ok = false; 1057 } 1058 } else { 1059 ra_pages = fc->max_read / PAGE_SIZE; 1060 fc->no_lock = 1; 1061 fc->no_flock = 1; 1062 } 1063 1064 fm->sb->s_bdi->ra_pages = 1065 min(fm->sb->s_bdi->ra_pages, ra_pages); 1066 fc->minor = arg->minor; 1067 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1068 fc->max_write = max_t(unsigned, 4096, fc->max_write); 1069 fc->conn_init = 1; 1070 } 1071 kfree(ia); 1072 1073 if (!ok) { 1074 fc->conn_init = 0; 1075 fc->conn_error = 1; 1076 } 1077 1078 fuse_set_initialized(fc); 1079 wake_up_all(&fc->blocked_waitq); 1080 } 1081 1082 void fuse_send_init(struct fuse_mount *fm) 1083 { 1084 struct fuse_init_args *ia; 1085 1086 ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); 1087 1088 ia->in.major = FUSE_KERNEL_VERSION; 1089 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1090 ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 1091 ia->in.flags |= 1092 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1093 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 1094 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 1095 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 1096 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 1097 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | 1098 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1099 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1100 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; 1101 #ifdef CONFIG_FUSE_DAX 1102 if (fm->fc->dax) 1103 ia->in.flags |= FUSE_MAP_ALIGNMENT; 1104 #endif 1105 if (fm->fc->auto_submounts) 1106 ia->in.flags |= FUSE_SUBMOUNTS; 1107 1108 ia->args.opcode = FUSE_INIT; 1109 ia->args.in_numargs = 1; 1110 ia->args.in_args[0].size = sizeof(ia->in); 1111 ia->args.in_args[0].value = &ia->in; 1112 ia->args.out_numargs = 1; 1113 /* Variable length argument used for backward compatibility 1114 with interface version < 7.5. Rest of init_out is zeroed 1115 by do_get_request(), so a short reply is not a problem */ 1116 ia->args.out_argvar = true; 1117 ia->args.out_args[0].size = sizeof(ia->out); 1118 ia->args.out_args[0].value = &ia->out; 1119 ia->args.force = true; 1120 ia->args.nocreds = true; 1121 ia->args.end = process_init_reply; 1122 1123 if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) 1124 process_init_reply(fm, &ia->args, -ENOTCONN); 1125 } 1126 EXPORT_SYMBOL_GPL(fuse_send_init); 1127 1128 void fuse_free_conn(struct fuse_conn *fc) 1129 { 1130 WARN_ON(!list_empty(&fc->devices)); 1131 kfree_rcu(fc, rcu); 1132 } 1133 EXPORT_SYMBOL_GPL(fuse_free_conn); 1134 1135 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 1136 { 1137 int err; 1138 char *suffix = ""; 1139 1140 if (sb->s_bdev) { 1141 suffix = "-fuseblk"; 1142 /* 1143 * sb->s_bdi points to blkdev's bdi however we want to redirect 1144 * it to our private bdi... 1145 */ 1146 bdi_put(sb->s_bdi); 1147 sb->s_bdi = &noop_backing_dev_info; 1148 } 1149 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), 1150 MINOR(fc->dev), suffix); 1151 if (err) 1152 return err; 1153 1154 /* fuse does it's own writeback accounting */ 1155 sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; 1156 sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; 1157 1158 /* 1159 * For a single fuse filesystem use max 1% of dirty + 1160 * writeback threshold. 1161 * 1162 * This gives about 1M of write buffer for memory maps on a 1163 * machine with 1G and 10% dirty_ratio, which should be more 1164 * than enough. 1165 * 1166 * Privileged users can raise it by writing to 1167 * 1168 * /sys/class/bdi/<bdi>/max_ratio 1169 */ 1170 bdi_set_max_ratio(sb->s_bdi, 1); 1171 1172 return 0; 1173 } 1174 1175 struct fuse_dev *fuse_dev_alloc(void) 1176 { 1177 struct fuse_dev *fud; 1178 struct list_head *pq; 1179 1180 fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); 1181 if (!fud) 1182 return NULL; 1183 1184 pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); 1185 if (!pq) { 1186 kfree(fud); 1187 return NULL; 1188 } 1189 1190 fud->pq.processing = pq; 1191 fuse_pqueue_init(&fud->pq); 1192 1193 return fud; 1194 } 1195 EXPORT_SYMBOL_GPL(fuse_dev_alloc); 1196 1197 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) 1198 { 1199 fud->fc = fuse_conn_get(fc); 1200 spin_lock(&fc->lock); 1201 list_add_tail(&fud->entry, &fc->devices); 1202 spin_unlock(&fc->lock); 1203 } 1204 EXPORT_SYMBOL_GPL(fuse_dev_install); 1205 1206 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) 1207 { 1208 struct fuse_dev *fud; 1209 1210 fud = fuse_dev_alloc(); 1211 if (!fud) 1212 return NULL; 1213 1214 fuse_dev_install(fud, fc); 1215 return fud; 1216 } 1217 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); 1218 1219 void fuse_dev_free(struct fuse_dev *fud) 1220 { 1221 struct fuse_conn *fc = fud->fc; 1222 1223 if (fc) { 1224 spin_lock(&fc->lock); 1225 list_del(&fud->entry); 1226 spin_unlock(&fc->lock); 1227 1228 fuse_conn_put(fc); 1229 } 1230 kfree(fud->pq.processing); 1231 kfree(fud); 1232 } 1233 EXPORT_SYMBOL_GPL(fuse_dev_free); 1234 1235 static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1236 const struct fuse_inode *fi) 1237 { 1238 *attr = (struct fuse_attr){ 1239 .ino = fi->inode.i_ino, 1240 .size = fi->inode.i_size, 1241 .blocks = fi->inode.i_blocks, 1242 .atime = fi->inode.i_atime.tv_sec, 1243 .mtime = fi->inode.i_mtime.tv_sec, 1244 .ctime = fi->inode.i_ctime.tv_sec, 1245 .atimensec = fi->inode.i_atime.tv_nsec, 1246 .mtimensec = fi->inode.i_mtime.tv_nsec, 1247 .ctimensec = fi->inode.i_ctime.tv_nsec, 1248 .mode = fi->inode.i_mode, 1249 .nlink = fi->inode.i_nlink, 1250 .uid = fi->inode.i_uid.val, 1251 .gid = fi->inode.i_gid.val, 1252 .rdev = fi->inode.i_rdev, 1253 .blksize = 1u << fi->inode.i_blkbits, 1254 }; 1255 } 1256 1257 static void fuse_sb_defaults(struct super_block *sb) 1258 { 1259 sb->s_magic = FUSE_SUPER_MAGIC; 1260 sb->s_op = &fuse_super_operations; 1261 sb->s_xattr = fuse_xattr_handlers; 1262 sb->s_maxbytes = MAX_LFS_FILESIZE; 1263 sb->s_time_gran = 1; 1264 sb->s_export_op = &fuse_export_operations; 1265 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1266 if (sb->s_user_ns != &init_user_ns) 1267 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1268 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1269 1270 /* 1271 * If we are not in the initial user namespace posix 1272 * acls must be translated. 1273 */ 1274 if (sb->s_user_ns != &init_user_ns) 1275 sb->s_xattr = fuse_no_acl_xattr_handlers; 1276 } 1277 1278 int fuse_fill_super_submount(struct super_block *sb, 1279 struct fuse_inode *parent_fi) 1280 { 1281 struct fuse_mount *fm = get_fuse_mount_super(sb); 1282 struct super_block *parent_sb = parent_fi->inode.i_sb; 1283 struct fuse_attr root_attr; 1284 struct inode *root; 1285 1286 fuse_sb_defaults(sb); 1287 fm->sb = sb; 1288 1289 WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1290 sb->s_bdi = bdi_get(parent_sb->s_bdi); 1291 1292 sb->s_xattr = parent_sb->s_xattr; 1293 sb->s_time_gran = parent_sb->s_time_gran; 1294 sb->s_blocksize = parent_sb->s_blocksize; 1295 sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1296 sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1297 if (parent_sb->s_subtype && !sb->s_subtype) 1298 return -ENOMEM; 1299 1300 fuse_fill_attr_from_inode(&root_attr, parent_fi); 1301 root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); 1302 /* 1303 * This inode is just a duplicate, so it is not looked up and 1304 * its nlookup should not be incremented. fuse_iget() does 1305 * that, though, so undo it here. 1306 */ 1307 get_fuse_inode(root)->nlookup--; 1308 sb->s_d_op = &fuse_dentry_operations; 1309 sb->s_root = d_make_root(root); 1310 if (!sb->s_root) 1311 return -ENOMEM; 1312 1313 return 0; 1314 } 1315 1316 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1317 { 1318 struct fuse_dev *fud = NULL; 1319 struct fuse_mount *fm = get_fuse_mount_super(sb); 1320 struct fuse_conn *fc = fm->fc; 1321 struct inode *root; 1322 struct dentry *root_dentry; 1323 int err; 1324 1325 err = -EINVAL; 1326 if (sb->s_flags & SB_MANDLOCK) 1327 goto err; 1328 1329 fuse_sb_defaults(sb); 1330 1331 if (ctx->is_bdev) { 1332 #ifdef CONFIG_BLOCK 1333 err = -EINVAL; 1334 if (!sb_set_blocksize(sb, ctx->blksize)) 1335 goto err; 1336 #endif 1337 } else { 1338 sb->s_blocksize = PAGE_SIZE; 1339 sb->s_blocksize_bits = PAGE_SHIFT; 1340 } 1341 1342 sb->s_subtype = ctx->subtype; 1343 ctx->subtype = NULL; 1344 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1345 err = fuse_dax_conn_alloc(fc, ctx->dax_dev); 1346 if (err) 1347 goto err; 1348 } 1349 1350 if (ctx->fudptr) { 1351 err = -ENOMEM; 1352 fud = fuse_dev_alloc_install(fc); 1353 if (!fud) 1354 goto err_free_dax; 1355 } 1356 1357 fc->dev = sb->s_dev; 1358 fm->sb = sb; 1359 err = fuse_bdi_init(fc, sb); 1360 if (err) 1361 goto err_dev_free; 1362 1363 /* Handle umasking inside the fuse code */ 1364 if (sb->s_flags & SB_POSIXACL) 1365 fc->dont_mask = 1; 1366 sb->s_flags |= SB_POSIXACL; 1367 1368 fc->default_permissions = ctx->default_permissions; 1369 fc->allow_other = ctx->allow_other; 1370 fc->user_id = ctx->user_id; 1371 fc->group_id = ctx->group_id; 1372 fc->legacy_opts_show = ctx->legacy_opts_show; 1373 fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1374 fc->destroy = ctx->destroy; 1375 fc->no_control = ctx->no_control; 1376 fc->no_force_umount = ctx->no_force_umount; 1377 1378 err = -ENOMEM; 1379 root = fuse_get_root_inode(sb, ctx->rootmode); 1380 sb->s_d_op = &fuse_root_dentry_operations; 1381 root_dentry = d_make_root(root); 1382 if (!root_dentry) 1383 goto err_dev_free; 1384 /* Root dentry doesn't have .d_revalidate */ 1385 sb->s_d_op = &fuse_dentry_operations; 1386 1387 mutex_lock(&fuse_mutex); 1388 err = -EINVAL; 1389 if (ctx->fudptr && *ctx->fudptr) 1390 goto err_unlock; 1391 1392 err = fuse_ctl_add_conn(fc); 1393 if (err) 1394 goto err_unlock; 1395 1396 list_add_tail(&fc->entry, &fuse_conn_list); 1397 sb->s_root = root_dentry; 1398 if (ctx->fudptr) 1399 *ctx->fudptr = fud; 1400 mutex_unlock(&fuse_mutex); 1401 return 0; 1402 1403 err_unlock: 1404 mutex_unlock(&fuse_mutex); 1405 dput(root_dentry); 1406 err_dev_free: 1407 if (fud) 1408 fuse_dev_free(fud); 1409 err_free_dax: 1410 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1411 fuse_dax_conn_free(fc); 1412 err: 1413 return err; 1414 } 1415 EXPORT_SYMBOL_GPL(fuse_fill_super_common); 1416 1417 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) 1418 { 1419 struct fuse_fs_context *ctx = fsc->fs_private; 1420 struct file *file; 1421 int err; 1422 struct fuse_conn *fc; 1423 struct fuse_mount *fm; 1424 1425 err = -EINVAL; 1426 file = fget(ctx->fd); 1427 if (!file) 1428 goto err; 1429 1430 /* 1431 * Require mount to happen from the same user namespace which 1432 * opened /dev/fuse to prevent potential attacks. 1433 */ 1434 if ((file->f_op != &fuse_dev_operations) || 1435 (file->f_cred->user_ns != sb->s_user_ns)) 1436 goto err_fput; 1437 ctx->fudptr = &file->private_data; 1438 1439 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1440 err = -ENOMEM; 1441 if (!fc) 1442 goto err_fput; 1443 1444 fm = kzalloc(sizeof(*fm), GFP_KERNEL); 1445 if (!fm) { 1446 kfree(fc); 1447 goto err_fput; 1448 } 1449 1450 fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); 1451 fc->release = fuse_free_conn; 1452 1453 sb->s_fs_info = fm; 1454 1455 err = fuse_fill_super_common(sb, ctx); 1456 if (err) 1457 goto err_put_conn; 1458 /* 1459 * atomic_dec_and_test() in fput() provides the necessary 1460 * memory barrier for file->private_data to be visible on all 1461 * CPUs after this 1462 */ 1463 fput(file); 1464 fuse_send_init(get_fuse_mount_super(sb)); 1465 return 0; 1466 1467 err_put_conn: 1468 fuse_mount_put(fm); 1469 sb->s_fs_info = NULL; 1470 err_fput: 1471 fput(file); 1472 err: 1473 return err; 1474 } 1475 1476 static int fuse_get_tree(struct fs_context *fc) 1477 { 1478 struct fuse_fs_context *ctx = fc->fs_private; 1479 1480 if (!ctx->fd_present || !ctx->rootmode_present || 1481 !ctx->user_id_present || !ctx->group_id_present) 1482 return -EINVAL; 1483 1484 #ifdef CONFIG_BLOCK 1485 if (ctx->is_bdev) 1486 return get_tree_bdev(fc, fuse_fill_super); 1487 #endif 1488 1489 return get_tree_nodev(fc, fuse_fill_super); 1490 } 1491 1492 static const struct fs_context_operations fuse_context_ops = { 1493 .free = fuse_free_fc, 1494 .parse_param = fuse_parse_param, 1495 .reconfigure = fuse_reconfigure, 1496 .get_tree = fuse_get_tree, 1497 }; 1498 1499 /* 1500 * Set up the filesystem mount context. 1501 */ 1502 static int fuse_init_fs_context(struct fs_context *fc) 1503 { 1504 struct fuse_fs_context *ctx; 1505 1506 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1507 if (!ctx) 1508 return -ENOMEM; 1509 1510 ctx->max_read = ~0; 1511 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1512 ctx->legacy_opts_show = true; 1513 1514 #ifdef CONFIG_BLOCK 1515 if (fc->fs_type == &fuseblk_fs_type) { 1516 ctx->is_bdev = true; 1517 ctx->destroy = true; 1518 } 1519 #endif 1520 1521 fc->fs_private = ctx; 1522 fc->ops = &fuse_context_ops; 1523 return 0; 1524 } 1525 1526 bool fuse_mount_remove(struct fuse_mount *fm) 1527 { 1528 struct fuse_conn *fc = fm->fc; 1529 bool last = false; 1530 1531 down_write(&fc->killsb); 1532 list_del_init(&fm->fc_entry); 1533 if (list_empty(&fc->mounts)) 1534 last = true; 1535 up_write(&fc->killsb); 1536 1537 return last; 1538 } 1539 EXPORT_SYMBOL_GPL(fuse_mount_remove); 1540 1541 void fuse_conn_destroy(struct fuse_mount *fm) 1542 { 1543 struct fuse_conn *fc = fm->fc; 1544 1545 if (fc->destroy) 1546 fuse_send_destroy(fm); 1547 1548 fuse_abort_conn(fc); 1549 fuse_wait_aborted(fc); 1550 1551 if (!list_empty(&fc->entry)) { 1552 mutex_lock(&fuse_mutex); 1553 list_del(&fc->entry); 1554 fuse_ctl_remove_conn(fc); 1555 mutex_unlock(&fuse_mutex); 1556 } 1557 } 1558 EXPORT_SYMBOL_GPL(fuse_conn_destroy); 1559 1560 static void fuse_kill_sb_anon(struct super_block *sb) 1561 { 1562 struct fuse_mount *fm = get_fuse_mount_super(sb); 1563 bool last; 1564 1565 if (fm) { 1566 last = fuse_mount_remove(fm); 1567 if (last) 1568 fuse_conn_destroy(fm); 1569 } 1570 kill_anon_super(sb); 1571 } 1572 1573 static struct file_system_type fuse_fs_type = { 1574 .owner = THIS_MODULE, 1575 .name = "fuse", 1576 .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, 1577 .init_fs_context = fuse_init_fs_context, 1578 .parameters = fuse_fs_parameters, 1579 .kill_sb = fuse_kill_sb_anon, 1580 }; 1581 MODULE_ALIAS_FS("fuse"); 1582 1583 #ifdef CONFIG_BLOCK 1584 static void fuse_kill_sb_blk(struct super_block *sb) 1585 { 1586 struct fuse_mount *fm = get_fuse_mount_super(sb); 1587 bool last; 1588 1589 if (fm) { 1590 last = fuse_mount_remove(fm); 1591 if (last) 1592 fuse_conn_destroy(fm); 1593 } 1594 kill_block_super(sb); 1595 } 1596 1597 static struct file_system_type fuseblk_fs_type = { 1598 .owner = THIS_MODULE, 1599 .name = "fuseblk", 1600 .init_fs_context = fuse_init_fs_context, 1601 .parameters = fuse_fs_parameters, 1602 .kill_sb = fuse_kill_sb_blk, 1603 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1604 }; 1605 MODULE_ALIAS_FS("fuseblk"); 1606 1607 static inline int register_fuseblk(void) 1608 { 1609 return register_filesystem(&fuseblk_fs_type); 1610 } 1611 1612 static inline void unregister_fuseblk(void) 1613 { 1614 unregister_filesystem(&fuseblk_fs_type); 1615 } 1616 #else 1617 static inline int register_fuseblk(void) 1618 { 1619 return 0; 1620 } 1621 1622 static inline void unregister_fuseblk(void) 1623 { 1624 } 1625 #endif 1626 1627 static void fuse_inode_init_once(void *foo) 1628 { 1629 struct inode *inode = foo; 1630 1631 inode_init_once(inode); 1632 } 1633 1634 static int __init fuse_fs_init(void) 1635 { 1636 int err; 1637 1638 fuse_inode_cachep = kmem_cache_create("fuse_inode", 1639 sizeof(struct fuse_inode), 0, 1640 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, 1641 fuse_inode_init_once); 1642 err = -ENOMEM; 1643 if (!fuse_inode_cachep) 1644 goto out; 1645 1646 err = register_fuseblk(); 1647 if (err) 1648 goto out2; 1649 1650 err = register_filesystem(&fuse_fs_type); 1651 if (err) 1652 goto out3; 1653 1654 return 0; 1655 1656 out3: 1657 unregister_fuseblk(); 1658 out2: 1659 kmem_cache_destroy(fuse_inode_cachep); 1660 out: 1661 return err; 1662 } 1663 1664 static void fuse_fs_cleanup(void) 1665 { 1666 unregister_filesystem(&fuse_fs_type); 1667 unregister_fuseblk(); 1668 1669 /* 1670 * Make sure all delayed rcu free inodes are flushed before we 1671 * destroy cache. 1672 */ 1673 rcu_barrier(); 1674 kmem_cache_destroy(fuse_inode_cachep); 1675 } 1676 1677 static struct kobject *fuse_kobj; 1678 1679 static int fuse_sysfs_init(void) 1680 { 1681 int err; 1682 1683 fuse_kobj = kobject_create_and_add("fuse", fs_kobj); 1684 if (!fuse_kobj) { 1685 err = -ENOMEM; 1686 goto out_err; 1687 } 1688 1689 err = sysfs_create_mount_point(fuse_kobj, "connections"); 1690 if (err) 1691 goto out_fuse_unregister; 1692 1693 return 0; 1694 1695 out_fuse_unregister: 1696 kobject_put(fuse_kobj); 1697 out_err: 1698 return err; 1699 } 1700 1701 static void fuse_sysfs_cleanup(void) 1702 { 1703 sysfs_remove_mount_point(fuse_kobj, "connections"); 1704 kobject_put(fuse_kobj); 1705 } 1706 1707 static int __init fuse_init(void) 1708 { 1709 int res; 1710 1711 pr_info("init (API version %i.%i)\n", 1712 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 1713 1714 INIT_LIST_HEAD(&fuse_conn_list); 1715 res = fuse_fs_init(); 1716 if (res) 1717 goto err; 1718 1719 res = fuse_dev_init(); 1720 if (res) 1721 goto err_fs_cleanup; 1722 1723 res = fuse_sysfs_init(); 1724 if (res) 1725 goto err_dev_cleanup; 1726 1727 res = fuse_ctl_init(); 1728 if (res) 1729 goto err_sysfs_cleanup; 1730 1731 sanitize_global_limit(&max_user_bgreq); 1732 sanitize_global_limit(&max_user_congthresh); 1733 1734 return 0; 1735 1736 err_sysfs_cleanup: 1737 fuse_sysfs_cleanup(); 1738 err_dev_cleanup: 1739 fuse_dev_cleanup(); 1740 err_fs_cleanup: 1741 fuse_fs_cleanup(); 1742 err: 1743 return res; 1744 } 1745 1746 static void __exit fuse_exit(void) 1747 { 1748 pr_debug("exit\n"); 1749 1750 fuse_ctl_cleanup(); 1751 fuse_sysfs_cleanup(); 1752 fuse_fs_cleanup(); 1753 fuse_dev_cleanup(); 1754 } 1755 1756 module_init(fuse_init); 1757 module_exit(fuse_exit); 1758