1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/file.h> 13 #include <linux/fs_context.h> 14 #include <linux/sched.h> 15 #include <linux/namei.h> 16 #include <linux/slab.h> 17 #include <linux/xattr.h> 18 #include <linux/iversion.h> 19 #include <linux/posix_acl.h> 20 #include <linux/security.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 24 static void fuse_advise_use_readdirplus(struct inode *dir) 25 { 26 struct fuse_inode *fi = get_fuse_inode(dir); 27 28 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); 29 } 30 31 #if BITS_PER_LONG >= 64 32 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) 33 { 34 entry->d_fsdata = (void *) time; 35 } 36 37 static inline u64 fuse_dentry_time(const struct dentry *entry) 38 { 39 return (u64)entry->d_fsdata; 40 } 41 42 #else 43 union fuse_dentry { 44 u64 time; 45 struct rcu_head rcu; 46 }; 47 48 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) 49 { 50 ((union fuse_dentry *) dentry->d_fsdata)->time = time; 51 } 52 53 static inline u64 fuse_dentry_time(const struct dentry *entry) 54 { 55 return ((union fuse_dentry *) entry->d_fsdata)->time; 56 } 57 #endif 58 59 static void fuse_dentry_settime(struct dentry *dentry, u64 time) 60 { 61 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); 62 bool delete = !time && fc->delete_stale; 63 /* 64 * Mess with DCACHE_OP_DELETE because dput() will be faster without it. 65 * Don't care about races, either way it's just an optimization 66 */ 67 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) || 68 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) { 69 spin_lock(&dentry->d_lock); 70 if (!delete) 71 dentry->d_flags &= ~DCACHE_OP_DELETE; 72 else 73 dentry->d_flags |= DCACHE_OP_DELETE; 74 spin_unlock(&dentry->d_lock); 75 } 76 77 __fuse_dentry_settime(dentry, time); 78 } 79 80 /* 81 * FUSE caches dentries and attributes with separate timeout. The 82 * time in jiffies until the dentry/attributes are valid is stored in 83 * dentry->d_fsdata and fuse_inode->i_time respectively. 84 */ 85 86 /* 87 * Calculate the time in jiffies until a dentry/attributes are valid 88 */ 89 static u64 time_to_jiffies(u64 sec, u32 nsec) 90 { 91 if (sec || nsec) { 92 struct timespec64 ts = { 93 sec, 94 min_t(u32, nsec, NSEC_PER_SEC - 1) 95 }; 96 97 return get_jiffies_64() + timespec64_to_jiffies(&ts); 98 } else 99 return 0; 100 } 101 102 /* 103 * Set dentry and possibly attribute timeouts from the lookup/mk* 104 * replies 105 */ 106 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o) 107 { 108 fuse_dentry_settime(entry, 109 time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); 110 } 111 112 static u64 attr_timeout(struct fuse_attr_out *o) 113 { 114 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 115 } 116 117 u64 entry_attr_timeout(struct fuse_entry_out *o) 118 { 119 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 120 } 121 122 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) 123 { 124 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); 125 } 126 127 /* 128 * Mark the attributes as stale, so that at the next call to 129 * ->getattr() they will be fetched from userspace 130 */ 131 void fuse_invalidate_attr(struct inode *inode) 132 { 133 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS); 134 } 135 136 static void fuse_dir_changed(struct inode *dir) 137 { 138 fuse_invalidate_attr(dir); 139 inode_maybe_inc_iversion(dir, false); 140 } 141 142 /** 143 * Mark the attributes as stale due to an atime change. Avoid the invalidate if 144 * atime is not used. 145 */ 146 void fuse_invalidate_atime(struct inode *inode) 147 { 148 if (!IS_RDONLY(inode)) 149 fuse_invalidate_attr_mask(inode, STATX_ATIME); 150 } 151 152 /* 153 * Just mark the entry as stale, so that a next attempt to look it up 154 * will result in a new lookup call to userspace 155 * 156 * This is called when a dentry is about to become negative and the 157 * timeout is unknown (unlink, rmdir, rename and in some cases 158 * lookup) 159 */ 160 void fuse_invalidate_entry_cache(struct dentry *entry) 161 { 162 fuse_dentry_settime(entry, 0); 163 } 164 165 /* 166 * Same as fuse_invalidate_entry_cache(), but also try to remove the 167 * dentry from the hash 168 */ 169 static void fuse_invalidate_entry(struct dentry *entry) 170 { 171 d_invalidate(entry); 172 fuse_invalidate_entry_cache(entry); 173 } 174 175 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, 176 u64 nodeid, const struct qstr *name, 177 struct fuse_entry_out *outarg) 178 { 179 memset(outarg, 0, sizeof(struct fuse_entry_out)); 180 args->opcode = FUSE_LOOKUP; 181 args->nodeid = nodeid; 182 args->in_numargs = 1; 183 args->in_args[0].size = name->len + 1; 184 args->in_args[0].value = name->name; 185 args->out_numargs = 1; 186 args->out_args[0].size = sizeof(struct fuse_entry_out); 187 args->out_args[0].value = outarg; 188 } 189 190 /* 191 * Check whether the dentry is still valid 192 * 193 * If the entry validity timeout has expired and the dentry is 194 * positive, try to redo the lookup. If the lookup results in a 195 * different inode, then let the VFS invalidate the dentry and redo 196 * the lookup once more. If the lookup results in the same inode, 197 * then refresh the attributes, timeouts and mark the dentry valid. 198 */ 199 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) 200 { 201 struct inode *inode; 202 struct dentry *parent; 203 struct fuse_mount *fm; 204 struct fuse_inode *fi; 205 int ret; 206 207 inode = d_inode_rcu(entry); 208 if (inode && fuse_is_bad(inode)) 209 goto invalid; 210 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || 211 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { 212 struct fuse_entry_out outarg; 213 FUSE_ARGS(args); 214 struct fuse_forget_link *forget; 215 u64 attr_version; 216 217 /* For negative dentries, always do a fresh lookup */ 218 if (!inode) 219 goto invalid; 220 221 ret = -ECHILD; 222 if (flags & LOOKUP_RCU) 223 goto out; 224 225 fm = get_fuse_mount(inode); 226 227 forget = fuse_alloc_forget(); 228 ret = -ENOMEM; 229 if (!forget) 230 goto out; 231 232 attr_version = fuse_get_attr_version(fm->fc); 233 234 parent = dget_parent(entry); 235 fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), 236 &entry->d_name, &outarg); 237 ret = fuse_simple_request(fm, &args); 238 dput(parent); 239 /* Zero nodeid is same as -ENOENT */ 240 if (!ret && !outarg.nodeid) 241 ret = -ENOENT; 242 if (!ret) { 243 fi = get_fuse_inode(inode); 244 if (outarg.nodeid != get_node_id(inode) || 245 (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { 246 fuse_queue_forget(fm->fc, forget, 247 outarg.nodeid, 1); 248 goto invalid; 249 } 250 spin_lock(&fi->lock); 251 fi->nlookup++; 252 spin_unlock(&fi->lock); 253 } 254 kfree(forget); 255 if (ret == -ENOMEM) 256 goto out; 257 if (ret || fuse_invalid_attr(&outarg.attr) || 258 fuse_stale_inode(inode, outarg.generation, &outarg.attr)) 259 goto invalid; 260 261 forget_all_cached_acls(inode); 262 fuse_change_attributes(inode, &outarg.attr, 263 entry_attr_timeout(&outarg), 264 attr_version); 265 fuse_change_entry_timeout(entry, &outarg); 266 } else if (inode) { 267 fi = get_fuse_inode(inode); 268 if (flags & LOOKUP_RCU) { 269 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) 270 return -ECHILD; 271 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { 272 parent = dget_parent(entry); 273 fuse_advise_use_readdirplus(d_inode(parent)); 274 dput(parent); 275 } 276 } 277 ret = 1; 278 out: 279 return ret; 280 281 invalid: 282 ret = 0; 283 goto out; 284 } 285 286 #if BITS_PER_LONG < 64 287 static int fuse_dentry_init(struct dentry *dentry) 288 { 289 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), 290 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); 291 292 return dentry->d_fsdata ? 0 : -ENOMEM; 293 } 294 static void fuse_dentry_release(struct dentry *dentry) 295 { 296 union fuse_dentry *fd = dentry->d_fsdata; 297 298 kfree_rcu(fd, rcu); 299 } 300 #endif 301 302 static int fuse_dentry_delete(const struct dentry *dentry) 303 { 304 return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); 305 } 306 307 /* 308 * Create a fuse_mount object with a new superblock (with path->dentry 309 * as the root), and return that mount so it can be auto-mounted on 310 * @path. 311 */ 312 static struct vfsmount *fuse_dentry_automount(struct path *path) 313 { 314 struct fs_context *fsc; 315 struct vfsmount *mnt; 316 struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry)); 317 318 fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); 319 if (IS_ERR(fsc)) 320 return ERR_CAST(fsc); 321 322 /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */ 323 fsc->fs_private = mp_fi; 324 325 /* Create the submount */ 326 mnt = fc_mount(fsc); 327 if (!IS_ERR(mnt)) 328 mntget(mnt); 329 330 put_fs_context(fsc); 331 return mnt; 332 } 333 334 const struct dentry_operations fuse_dentry_operations = { 335 .d_revalidate = fuse_dentry_revalidate, 336 .d_delete = fuse_dentry_delete, 337 #if BITS_PER_LONG < 64 338 .d_init = fuse_dentry_init, 339 .d_release = fuse_dentry_release, 340 #endif 341 .d_automount = fuse_dentry_automount, 342 }; 343 344 const struct dentry_operations fuse_root_dentry_operations = { 345 #if BITS_PER_LONG < 64 346 .d_init = fuse_dentry_init, 347 .d_release = fuse_dentry_release, 348 #endif 349 }; 350 351 int fuse_valid_type(int m) 352 { 353 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || 354 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); 355 } 356 357 bool fuse_invalid_attr(struct fuse_attr *attr) 358 { 359 return !fuse_valid_type(attr->mode) || 360 attr->size > LLONG_MAX; 361 } 362 363 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 364 struct fuse_entry_out *outarg, struct inode **inode) 365 { 366 struct fuse_mount *fm = get_fuse_mount_super(sb); 367 FUSE_ARGS(args); 368 struct fuse_forget_link *forget; 369 u64 attr_version; 370 int err; 371 372 *inode = NULL; 373 err = -ENAMETOOLONG; 374 if (name->len > FUSE_NAME_MAX) 375 goto out; 376 377 378 forget = fuse_alloc_forget(); 379 err = -ENOMEM; 380 if (!forget) 381 goto out; 382 383 attr_version = fuse_get_attr_version(fm->fc); 384 385 fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); 386 err = fuse_simple_request(fm, &args); 387 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 388 if (err || !outarg->nodeid) 389 goto out_put_forget; 390 391 err = -EIO; 392 if (!outarg->nodeid) 393 goto out_put_forget; 394 if (fuse_invalid_attr(&outarg->attr)) 395 goto out_put_forget; 396 397 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, 398 &outarg->attr, entry_attr_timeout(outarg), 399 attr_version); 400 err = -ENOMEM; 401 if (!*inode) { 402 fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); 403 goto out; 404 } 405 err = 0; 406 407 out_put_forget: 408 kfree(forget); 409 out: 410 return err; 411 } 412 413 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 414 unsigned int flags) 415 { 416 int err; 417 struct fuse_entry_out outarg; 418 struct inode *inode; 419 struct dentry *newent; 420 bool outarg_valid = true; 421 bool locked; 422 423 if (fuse_is_bad(dir)) 424 return ERR_PTR(-EIO); 425 426 locked = fuse_lock_inode(dir); 427 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, 428 &outarg, &inode); 429 fuse_unlock_inode(dir, locked); 430 if (err == -ENOENT) { 431 outarg_valid = false; 432 err = 0; 433 } 434 if (err) 435 goto out_err; 436 437 err = -EIO; 438 if (inode && get_node_id(inode) == FUSE_ROOT_ID) 439 goto out_iput; 440 441 newent = d_splice_alias(inode, entry); 442 err = PTR_ERR(newent); 443 if (IS_ERR(newent)) 444 goto out_err; 445 446 entry = newent ? newent : entry; 447 if (outarg_valid) 448 fuse_change_entry_timeout(entry, &outarg); 449 else 450 fuse_invalidate_entry_cache(entry); 451 452 if (inode) 453 fuse_advise_use_readdirplus(dir); 454 return newent; 455 456 out_iput: 457 iput(inode); 458 out_err: 459 return ERR_PTR(err); 460 } 461 462 static int get_security_context(struct dentry *entry, umode_t mode, 463 void **security_ctx, u32 *security_ctxlen) 464 { 465 struct fuse_secctx *fctx; 466 struct fuse_secctx_header *header; 467 void *ctx = NULL, *ptr; 468 u32 ctxlen, total_len = sizeof(*header); 469 int err, nr_ctx = 0; 470 const char *name; 471 size_t namelen; 472 473 err = security_dentry_init_security(entry, mode, &entry->d_name, 474 &name, &ctx, &ctxlen); 475 if (err) { 476 if (err != -EOPNOTSUPP) 477 goto out_err; 478 /* No LSM is supporting this security hook. Ignore error */ 479 ctxlen = 0; 480 ctx = NULL; 481 } 482 483 if (ctxlen) { 484 nr_ctx = 1; 485 namelen = strlen(name) + 1; 486 err = -EIO; 487 if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX)) 488 goto out_err; 489 total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen); 490 } 491 492 err = -ENOMEM; 493 header = ptr = kzalloc(total_len, GFP_KERNEL); 494 if (!ptr) 495 goto out_err; 496 497 header->nr_secctx = nr_ctx; 498 header->size = total_len; 499 ptr += sizeof(*header); 500 if (nr_ctx) { 501 fctx = ptr; 502 fctx->size = ctxlen; 503 ptr += sizeof(*fctx); 504 505 strcpy(ptr, name); 506 ptr += namelen; 507 508 memcpy(ptr, ctx, ctxlen); 509 } 510 *security_ctxlen = total_len; 511 *security_ctx = header; 512 err = 0; 513 out_err: 514 kfree(ctx); 515 return err; 516 } 517 518 /* 519 * Atomic create+open operation 520 * 521 * If the filesystem doesn't support this, then fall back to separate 522 * 'mknod' + 'open' requests. 523 */ 524 static int fuse_create_open(struct inode *dir, struct dentry *entry, 525 struct file *file, unsigned int flags, 526 umode_t mode) 527 { 528 int err; 529 struct inode *inode; 530 struct fuse_mount *fm = get_fuse_mount(dir); 531 FUSE_ARGS(args); 532 struct fuse_forget_link *forget; 533 struct fuse_create_in inarg; 534 struct fuse_open_out outopen; 535 struct fuse_entry_out outentry; 536 struct fuse_inode *fi; 537 struct fuse_file *ff; 538 void *security_ctx = NULL; 539 u32 security_ctxlen; 540 541 /* Userspace expects S_IFREG in create mode */ 542 BUG_ON((mode & S_IFMT) != S_IFREG); 543 544 forget = fuse_alloc_forget(); 545 err = -ENOMEM; 546 if (!forget) 547 goto out_err; 548 549 err = -ENOMEM; 550 ff = fuse_file_alloc(fm); 551 if (!ff) 552 goto out_put_forget_req; 553 554 if (!fm->fc->dont_mask) 555 mode &= ~current_umask(); 556 557 flags &= ~O_NOCTTY; 558 memset(&inarg, 0, sizeof(inarg)); 559 memset(&outentry, 0, sizeof(outentry)); 560 inarg.flags = flags; 561 inarg.mode = mode; 562 inarg.umask = current_umask(); 563 564 if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) && 565 !(flags & O_EXCL) && !capable(CAP_FSETID)) { 566 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; 567 } 568 569 args.opcode = FUSE_CREATE; 570 args.nodeid = get_node_id(dir); 571 args.in_numargs = 2; 572 args.in_args[0].size = sizeof(inarg); 573 args.in_args[0].value = &inarg; 574 args.in_args[1].size = entry->d_name.len + 1; 575 args.in_args[1].value = entry->d_name.name; 576 args.out_numargs = 2; 577 args.out_args[0].size = sizeof(outentry); 578 args.out_args[0].value = &outentry; 579 args.out_args[1].size = sizeof(outopen); 580 args.out_args[1].value = &outopen; 581 582 if (fm->fc->init_security) { 583 err = get_security_context(entry, mode, &security_ctx, 584 &security_ctxlen); 585 if (err) 586 goto out_put_forget_req; 587 588 args.in_numargs = 3; 589 args.in_args[2].size = security_ctxlen; 590 args.in_args[2].value = security_ctx; 591 } 592 593 err = fuse_simple_request(fm, &args); 594 kfree(security_ctx); 595 if (err) 596 goto out_free_ff; 597 598 err = -EIO; 599 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || 600 fuse_invalid_attr(&outentry.attr)) 601 goto out_free_ff; 602 603 ff->fh = outopen.fh; 604 ff->nodeid = outentry.nodeid; 605 ff->open_flags = outopen.open_flags; 606 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, 607 &outentry.attr, entry_attr_timeout(&outentry), 0); 608 if (!inode) { 609 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 610 fuse_sync_release(NULL, ff, flags); 611 fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); 612 err = -ENOMEM; 613 goto out_err; 614 } 615 kfree(forget); 616 d_instantiate(entry, inode); 617 fuse_change_entry_timeout(entry, &outentry); 618 fuse_dir_changed(dir); 619 err = finish_open(file, entry, generic_file_open); 620 if (err) { 621 fi = get_fuse_inode(inode); 622 fuse_sync_release(fi, ff, flags); 623 } else { 624 file->private_data = ff; 625 fuse_finish_open(inode, file); 626 } 627 return err; 628 629 out_free_ff: 630 fuse_file_free(ff); 631 out_put_forget_req: 632 kfree(forget); 633 out_err: 634 return err; 635 } 636 637 static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *, 638 umode_t, dev_t); 639 static int fuse_atomic_open(struct inode *dir, struct dentry *entry, 640 struct file *file, unsigned flags, 641 umode_t mode) 642 { 643 int err; 644 struct fuse_conn *fc = get_fuse_conn(dir); 645 struct dentry *res = NULL; 646 647 if (fuse_is_bad(dir)) 648 return -EIO; 649 650 if (d_in_lookup(entry)) { 651 res = fuse_lookup(dir, entry, 0); 652 if (IS_ERR(res)) 653 return PTR_ERR(res); 654 655 if (res) 656 entry = res; 657 } 658 659 if (!(flags & O_CREAT) || d_really_is_positive(entry)) 660 goto no_open; 661 662 /* Only creates */ 663 file->f_mode |= FMODE_CREATED; 664 665 if (fc->no_create) 666 goto mknod; 667 668 err = fuse_create_open(dir, entry, file, flags, mode); 669 if (err == -ENOSYS) { 670 fc->no_create = 1; 671 goto mknod; 672 } 673 out_dput: 674 dput(res); 675 return err; 676 677 mknod: 678 err = fuse_mknod(&init_user_ns, dir, entry, mode, 0); 679 if (err) 680 goto out_dput; 681 no_open: 682 return finish_no_open(file, res); 683 } 684 685 /* 686 * Code shared between mknod, mkdir, symlink and link 687 */ 688 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, 689 struct inode *dir, struct dentry *entry, 690 umode_t mode) 691 { 692 struct fuse_entry_out outarg; 693 struct inode *inode; 694 struct dentry *d; 695 int err; 696 struct fuse_forget_link *forget; 697 void *security_ctx = NULL; 698 u32 security_ctxlen; 699 700 if (fuse_is_bad(dir)) 701 return -EIO; 702 703 forget = fuse_alloc_forget(); 704 if (!forget) 705 return -ENOMEM; 706 707 memset(&outarg, 0, sizeof(outarg)); 708 args->nodeid = get_node_id(dir); 709 args->out_numargs = 1; 710 args->out_args[0].size = sizeof(outarg); 711 args->out_args[0].value = &outarg; 712 713 if (fm->fc->init_security && args->opcode != FUSE_LINK) { 714 err = get_security_context(entry, mode, &security_ctx, 715 &security_ctxlen); 716 if (err) 717 goto out_put_forget_req; 718 719 BUG_ON(args->in_numargs != 2); 720 721 args->in_numargs = 3; 722 args->in_args[2].size = security_ctxlen; 723 args->in_args[2].value = security_ctx; 724 } 725 726 err = fuse_simple_request(fm, args); 727 kfree(security_ctx); 728 if (err) 729 goto out_put_forget_req; 730 731 err = -EIO; 732 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr)) 733 goto out_put_forget_req; 734 735 if ((outarg.attr.mode ^ mode) & S_IFMT) 736 goto out_put_forget_req; 737 738 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 739 &outarg.attr, entry_attr_timeout(&outarg), 0); 740 if (!inode) { 741 fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); 742 return -ENOMEM; 743 } 744 kfree(forget); 745 746 d_drop(entry); 747 d = d_splice_alias(inode, entry); 748 if (IS_ERR(d)) 749 return PTR_ERR(d); 750 751 if (d) { 752 fuse_change_entry_timeout(d, &outarg); 753 dput(d); 754 } else { 755 fuse_change_entry_timeout(entry, &outarg); 756 } 757 fuse_dir_changed(dir); 758 return 0; 759 760 out_put_forget_req: 761 kfree(forget); 762 return err; 763 } 764 765 static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir, 766 struct dentry *entry, umode_t mode, dev_t rdev) 767 { 768 struct fuse_mknod_in inarg; 769 struct fuse_mount *fm = get_fuse_mount(dir); 770 FUSE_ARGS(args); 771 772 if (!fm->fc->dont_mask) 773 mode &= ~current_umask(); 774 775 memset(&inarg, 0, sizeof(inarg)); 776 inarg.mode = mode; 777 inarg.rdev = new_encode_dev(rdev); 778 inarg.umask = current_umask(); 779 args.opcode = FUSE_MKNOD; 780 args.in_numargs = 2; 781 args.in_args[0].size = sizeof(inarg); 782 args.in_args[0].value = &inarg; 783 args.in_args[1].size = entry->d_name.len + 1; 784 args.in_args[1].value = entry->d_name.name; 785 return create_new_entry(fm, &args, dir, entry, mode); 786 } 787 788 static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir, 789 struct dentry *entry, umode_t mode, bool excl) 790 { 791 return fuse_mknod(&init_user_ns, dir, entry, mode, 0); 792 } 793 794 static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir, 795 struct dentry *entry, umode_t mode) 796 { 797 struct fuse_mkdir_in inarg; 798 struct fuse_mount *fm = get_fuse_mount(dir); 799 FUSE_ARGS(args); 800 801 if (!fm->fc->dont_mask) 802 mode &= ~current_umask(); 803 804 memset(&inarg, 0, sizeof(inarg)); 805 inarg.mode = mode; 806 inarg.umask = current_umask(); 807 args.opcode = FUSE_MKDIR; 808 args.in_numargs = 2; 809 args.in_args[0].size = sizeof(inarg); 810 args.in_args[0].value = &inarg; 811 args.in_args[1].size = entry->d_name.len + 1; 812 args.in_args[1].value = entry->d_name.name; 813 return create_new_entry(fm, &args, dir, entry, S_IFDIR); 814 } 815 816 static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir, 817 struct dentry *entry, const char *link) 818 { 819 struct fuse_mount *fm = get_fuse_mount(dir); 820 unsigned len = strlen(link) + 1; 821 FUSE_ARGS(args); 822 823 args.opcode = FUSE_SYMLINK; 824 args.in_numargs = 2; 825 args.in_args[0].size = entry->d_name.len + 1; 826 args.in_args[0].value = entry->d_name.name; 827 args.in_args[1].size = len; 828 args.in_args[1].value = link; 829 return create_new_entry(fm, &args, dir, entry, S_IFLNK); 830 } 831 832 void fuse_flush_time_update(struct inode *inode) 833 { 834 int err = sync_inode_metadata(inode, 1); 835 836 mapping_set_error(inode->i_mapping, err); 837 } 838 839 static void fuse_update_ctime_in_cache(struct inode *inode) 840 { 841 if (!IS_NOCMTIME(inode)) { 842 inode->i_ctime = current_time(inode); 843 mark_inode_dirty_sync(inode); 844 fuse_flush_time_update(inode); 845 } 846 } 847 848 void fuse_update_ctime(struct inode *inode) 849 { 850 fuse_invalidate_attr_mask(inode, STATX_CTIME); 851 fuse_update_ctime_in_cache(inode); 852 } 853 854 static void fuse_entry_unlinked(struct dentry *entry) 855 { 856 struct inode *inode = d_inode(entry); 857 struct fuse_conn *fc = get_fuse_conn(inode); 858 struct fuse_inode *fi = get_fuse_inode(inode); 859 860 spin_lock(&fi->lock); 861 fi->attr_version = atomic64_inc_return(&fc->attr_version); 862 /* 863 * If i_nlink == 0 then unlink doesn't make sense, yet this can 864 * happen if userspace filesystem is careless. It would be 865 * difficult to enforce correct nlink usage so just ignore this 866 * condition here 867 */ 868 if (S_ISDIR(inode->i_mode)) 869 clear_nlink(inode); 870 else if (inode->i_nlink > 0) 871 drop_nlink(inode); 872 spin_unlock(&fi->lock); 873 fuse_invalidate_entry_cache(entry); 874 fuse_update_ctime(inode); 875 } 876 877 static int fuse_unlink(struct inode *dir, struct dentry *entry) 878 { 879 int err; 880 struct fuse_mount *fm = get_fuse_mount(dir); 881 FUSE_ARGS(args); 882 883 if (fuse_is_bad(dir)) 884 return -EIO; 885 886 args.opcode = FUSE_UNLINK; 887 args.nodeid = get_node_id(dir); 888 args.in_numargs = 1; 889 args.in_args[0].size = entry->d_name.len + 1; 890 args.in_args[0].value = entry->d_name.name; 891 err = fuse_simple_request(fm, &args); 892 if (!err) { 893 fuse_dir_changed(dir); 894 fuse_entry_unlinked(entry); 895 } else if (err == -EINTR) 896 fuse_invalidate_entry(entry); 897 return err; 898 } 899 900 static int fuse_rmdir(struct inode *dir, struct dentry *entry) 901 { 902 int err; 903 struct fuse_mount *fm = get_fuse_mount(dir); 904 FUSE_ARGS(args); 905 906 if (fuse_is_bad(dir)) 907 return -EIO; 908 909 args.opcode = FUSE_RMDIR; 910 args.nodeid = get_node_id(dir); 911 args.in_numargs = 1; 912 args.in_args[0].size = entry->d_name.len + 1; 913 args.in_args[0].value = entry->d_name.name; 914 err = fuse_simple_request(fm, &args); 915 if (!err) { 916 fuse_dir_changed(dir); 917 fuse_entry_unlinked(entry); 918 } else if (err == -EINTR) 919 fuse_invalidate_entry(entry); 920 return err; 921 } 922 923 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, 924 struct inode *newdir, struct dentry *newent, 925 unsigned int flags, int opcode, size_t argsize) 926 { 927 int err; 928 struct fuse_rename2_in inarg; 929 struct fuse_mount *fm = get_fuse_mount(olddir); 930 FUSE_ARGS(args); 931 932 memset(&inarg, 0, argsize); 933 inarg.newdir = get_node_id(newdir); 934 inarg.flags = flags; 935 args.opcode = opcode; 936 args.nodeid = get_node_id(olddir); 937 args.in_numargs = 3; 938 args.in_args[0].size = argsize; 939 args.in_args[0].value = &inarg; 940 args.in_args[1].size = oldent->d_name.len + 1; 941 args.in_args[1].value = oldent->d_name.name; 942 args.in_args[2].size = newent->d_name.len + 1; 943 args.in_args[2].value = newent->d_name.name; 944 err = fuse_simple_request(fm, &args); 945 if (!err) { 946 /* ctime changes */ 947 fuse_update_ctime(d_inode(oldent)); 948 949 if (flags & RENAME_EXCHANGE) 950 fuse_update_ctime(d_inode(newent)); 951 952 fuse_dir_changed(olddir); 953 if (olddir != newdir) 954 fuse_dir_changed(newdir); 955 956 /* newent will end up negative */ 957 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) 958 fuse_entry_unlinked(newent); 959 } else if (err == -EINTR) { 960 /* If request was interrupted, DEITY only knows if the 961 rename actually took place. If the invalidation 962 fails (e.g. some process has CWD under the renamed 963 directory), then there can be inconsistency between 964 the dcache and the real filesystem. Tough luck. */ 965 fuse_invalidate_entry(oldent); 966 if (d_really_is_positive(newent)) 967 fuse_invalidate_entry(newent); 968 } 969 970 return err; 971 } 972 973 static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir, 974 struct dentry *oldent, struct inode *newdir, 975 struct dentry *newent, unsigned int flags) 976 { 977 struct fuse_conn *fc = get_fuse_conn(olddir); 978 int err; 979 980 if (fuse_is_bad(olddir)) 981 return -EIO; 982 983 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 984 return -EINVAL; 985 986 if (flags) { 987 if (fc->no_rename2 || fc->minor < 23) 988 return -EINVAL; 989 990 err = fuse_rename_common(olddir, oldent, newdir, newent, flags, 991 FUSE_RENAME2, 992 sizeof(struct fuse_rename2_in)); 993 if (err == -ENOSYS) { 994 fc->no_rename2 = 1; 995 err = -EINVAL; 996 } 997 } else { 998 err = fuse_rename_common(olddir, oldent, newdir, newent, 0, 999 FUSE_RENAME, 1000 sizeof(struct fuse_rename_in)); 1001 } 1002 1003 return err; 1004 } 1005 1006 static int fuse_link(struct dentry *entry, struct inode *newdir, 1007 struct dentry *newent) 1008 { 1009 int err; 1010 struct fuse_link_in inarg; 1011 struct inode *inode = d_inode(entry); 1012 struct fuse_mount *fm = get_fuse_mount(inode); 1013 FUSE_ARGS(args); 1014 1015 memset(&inarg, 0, sizeof(inarg)); 1016 inarg.oldnodeid = get_node_id(inode); 1017 args.opcode = FUSE_LINK; 1018 args.in_numargs = 2; 1019 args.in_args[0].size = sizeof(inarg); 1020 args.in_args[0].value = &inarg; 1021 args.in_args[1].size = newent->d_name.len + 1; 1022 args.in_args[1].value = newent->d_name.name; 1023 err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); 1024 if (!err) 1025 fuse_update_ctime_in_cache(inode); 1026 else if (err == -EINTR) 1027 fuse_invalidate_attr(inode); 1028 1029 return err; 1030 } 1031 1032 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, 1033 struct kstat *stat) 1034 { 1035 unsigned int blkbits; 1036 struct fuse_conn *fc = get_fuse_conn(inode); 1037 1038 stat->dev = inode->i_sb->s_dev; 1039 stat->ino = attr->ino; 1040 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 1041 stat->nlink = attr->nlink; 1042 stat->uid = make_kuid(fc->user_ns, attr->uid); 1043 stat->gid = make_kgid(fc->user_ns, attr->gid); 1044 stat->rdev = inode->i_rdev; 1045 stat->atime.tv_sec = attr->atime; 1046 stat->atime.tv_nsec = attr->atimensec; 1047 stat->mtime.tv_sec = attr->mtime; 1048 stat->mtime.tv_nsec = attr->mtimensec; 1049 stat->ctime.tv_sec = attr->ctime; 1050 stat->ctime.tv_nsec = attr->ctimensec; 1051 stat->size = attr->size; 1052 stat->blocks = attr->blocks; 1053 1054 if (attr->blksize != 0) 1055 blkbits = ilog2(attr->blksize); 1056 else 1057 blkbits = inode->i_sb->s_blocksize_bits; 1058 1059 stat->blksize = 1 << blkbits; 1060 } 1061 1062 static int fuse_do_getattr(struct inode *inode, struct kstat *stat, 1063 struct file *file) 1064 { 1065 int err; 1066 struct fuse_getattr_in inarg; 1067 struct fuse_attr_out outarg; 1068 struct fuse_mount *fm = get_fuse_mount(inode); 1069 FUSE_ARGS(args); 1070 u64 attr_version; 1071 1072 attr_version = fuse_get_attr_version(fm->fc); 1073 1074 memset(&inarg, 0, sizeof(inarg)); 1075 memset(&outarg, 0, sizeof(outarg)); 1076 /* Directories have separate file-handle space */ 1077 if (file && S_ISREG(inode->i_mode)) { 1078 struct fuse_file *ff = file->private_data; 1079 1080 inarg.getattr_flags |= FUSE_GETATTR_FH; 1081 inarg.fh = ff->fh; 1082 } 1083 args.opcode = FUSE_GETATTR; 1084 args.nodeid = get_node_id(inode); 1085 args.in_numargs = 1; 1086 args.in_args[0].size = sizeof(inarg); 1087 args.in_args[0].value = &inarg; 1088 args.out_numargs = 1; 1089 args.out_args[0].size = sizeof(outarg); 1090 args.out_args[0].value = &outarg; 1091 err = fuse_simple_request(fm, &args); 1092 if (!err) { 1093 if (fuse_invalid_attr(&outarg.attr) || 1094 inode_wrong_type(inode, outarg.attr.mode)) { 1095 fuse_make_bad(inode); 1096 err = -EIO; 1097 } else { 1098 fuse_change_attributes(inode, &outarg.attr, 1099 attr_timeout(&outarg), 1100 attr_version); 1101 if (stat) 1102 fuse_fillattr(inode, &outarg.attr, stat); 1103 } 1104 } 1105 return err; 1106 } 1107 1108 static int fuse_update_get_attr(struct inode *inode, struct file *file, 1109 struct kstat *stat, u32 request_mask, 1110 unsigned int flags) 1111 { 1112 struct fuse_inode *fi = get_fuse_inode(inode); 1113 int err = 0; 1114 bool sync; 1115 u32 inval_mask = READ_ONCE(fi->inval_mask); 1116 u32 cache_mask = fuse_get_cache_mask(inode); 1117 1118 if (flags & AT_STATX_FORCE_SYNC) 1119 sync = true; 1120 else if (flags & AT_STATX_DONT_SYNC) 1121 sync = false; 1122 else if (request_mask & inval_mask & ~cache_mask) 1123 sync = true; 1124 else 1125 sync = time_before64(fi->i_time, get_jiffies_64()); 1126 1127 if (sync) { 1128 forget_all_cached_acls(inode); 1129 err = fuse_do_getattr(inode, stat, file); 1130 } else if (stat) { 1131 generic_fillattr(&init_user_ns, inode, stat); 1132 stat->mode = fi->orig_i_mode; 1133 stat->ino = fi->orig_ino; 1134 } 1135 1136 return err; 1137 } 1138 1139 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) 1140 { 1141 return fuse_update_get_attr(inode, file, NULL, mask, 0); 1142 } 1143 1144 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1145 u64 child_nodeid, struct qstr *name) 1146 { 1147 int err = -ENOTDIR; 1148 struct inode *parent; 1149 struct dentry *dir; 1150 struct dentry *entry; 1151 1152 parent = fuse_ilookup(fc, parent_nodeid, NULL); 1153 if (!parent) 1154 return -ENOENT; 1155 1156 inode_lock_nested(parent, I_MUTEX_PARENT); 1157 if (!S_ISDIR(parent->i_mode)) 1158 goto unlock; 1159 1160 err = -ENOENT; 1161 dir = d_find_alias(parent); 1162 if (!dir) 1163 goto unlock; 1164 1165 name->hash = full_name_hash(dir, name->name, name->len); 1166 entry = d_lookup(dir, name); 1167 dput(dir); 1168 if (!entry) 1169 goto unlock; 1170 1171 fuse_dir_changed(parent); 1172 fuse_invalidate_entry(entry); 1173 1174 if (child_nodeid != 0 && d_really_is_positive(entry)) { 1175 inode_lock(d_inode(entry)); 1176 if (get_node_id(d_inode(entry)) != child_nodeid) { 1177 err = -ENOENT; 1178 goto badentry; 1179 } 1180 if (d_mountpoint(entry)) { 1181 err = -EBUSY; 1182 goto badentry; 1183 } 1184 if (d_is_dir(entry)) { 1185 shrink_dcache_parent(entry); 1186 if (!simple_empty(entry)) { 1187 err = -ENOTEMPTY; 1188 goto badentry; 1189 } 1190 d_inode(entry)->i_flags |= S_DEAD; 1191 } 1192 dont_mount(entry); 1193 clear_nlink(d_inode(entry)); 1194 err = 0; 1195 badentry: 1196 inode_unlock(d_inode(entry)); 1197 if (!err) 1198 d_delete(entry); 1199 } else { 1200 err = 0; 1201 } 1202 dput(entry); 1203 1204 unlock: 1205 inode_unlock(parent); 1206 iput(parent); 1207 return err; 1208 } 1209 1210 /* 1211 * Calling into a user-controlled filesystem gives the filesystem 1212 * daemon ptrace-like capabilities over the current process. This 1213 * means, that the filesystem daemon is able to record the exact 1214 * filesystem operations performed, and can also control the behavior 1215 * of the requester process in otherwise impossible ways. For example 1216 * it can delay the operation for arbitrary length of time allowing 1217 * DoS against the requester. 1218 * 1219 * For this reason only those processes can call into the filesystem, 1220 * for which the owner of the mount has ptrace privilege. This 1221 * excludes processes started by other users, suid or sgid processes. 1222 */ 1223 int fuse_allow_current_process(struct fuse_conn *fc) 1224 { 1225 const struct cred *cred; 1226 1227 if (fc->allow_other) 1228 return current_in_userns(fc->user_ns); 1229 1230 cred = current_cred(); 1231 if (uid_eq(cred->euid, fc->user_id) && 1232 uid_eq(cred->suid, fc->user_id) && 1233 uid_eq(cred->uid, fc->user_id) && 1234 gid_eq(cred->egid, fc->group_id) && 1235 gid_eq(cred->sgid, fc->group_id) && 1236 gid_eq(cred->gid, fc->group_id)) 1237 return 1; 1238 1239 return 0; 1240 } 1241 1242 static int fuse_access(struct inode *inode, int mask) 1243 { 1244 struct fuse_mount *fm = get_fuse_mount(inode); 1245 FUSE_ARGS(args); 1246 struct fuse_access_in inarg; 1247 int err; 1248 1249 BUG_ON(mask & MAY_NOT_BLOCK); 1250 1251 if (fm->fc->no_access) 1252 return 0; 1253 1254 memset(&inarg, 0, sizeof(inarg)); 1255 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); 1256 args.opcode = FUSE_ACCESS; 1257 args.nodeid = get_node_id(inode); 1258 args.in_numargs = 1; 1259 args.in_args[0].size = sizeof(inarg); 1260 args.in_args[0].value = &inarg; 1261 err = fuse_simple_request(fm, &args); 1262 if (err == -ENOSYS) { 1263 fm->fc->no_access = 1; 1264 err = 0; 1265 } 1266 return err; 1267 } 1268 1269 static int fuse_perm_getattr(struct inode *inode, int mask) 1270 { 1271 if (mask & MAY_NOT_BLOCK) 1272 return -ECHILD; 1273 1274 forget_all_cached_acls(inode); 1275 return fuse_do_getattr(inode, NULL, NULL); 1276 } 1277 1278 /* 1279 * Check permission. The two basic access models of FUSE are: 1280 * 1281 * 1) Local access checking ('default_permissions' mount option) based 1282 * on file mode. This is the plain old disk filesystem permission 1283 * modell. 1284 * 1285 * 2) "Remote" access checking, where server is responsible for 1286 * checking permission in each inode operation. An exception to this 1287 * is if ->permission() was invoked from sys_access() in which case an 1288 * access request is sent. Execute permission is still checked 1289 * locally based on file mode. 1290 */ 1291 static int fuse_permission(struct user_namespace *mnt_userns, 1292 struct inode *inode, int mask) 1293 { 1294 struct fuse_conn *fc = get_fuse_conn(inode); 1295 bool refreshed = false; 1296 int err = 0; 1297 1298 if (fuse_is_bad(inode)) 1299 return -EIO; 1300 1301 if (!fuse_allow_current_process(fc)) 1302 return -EACCES; 1303 1304 /* 1305 * If attributes are needed, refresh them before proceeding 1306 */ 1307 if (fc->default_permissions || 1308 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1309 struct fuse_inode *fi = get_fuse_inode(inode); 1310 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID; 1311 1312 if (perm_mask & READ_ONCE(fi->inval_mask) || 1313 time_before64(fi->i_time, get_jiffies_64())) { 1314 refreshed = true; 1315 1316 err = fuse_perm_getattr(inode, mask); 1317 if (err) 1318 return err; 1319 } 1320 } 1321 1322 if (fc->default_permissions) { 1323 err = generic_permission(&init_user_ns, inode, mask); 1324 1325 /* If permission is denied, try to refresh file 1326 attributes. This is also needed, because the root 1327 node will at first have no permissions */ 1328 if (err == -EACCES && !refreshed) { 1329 err = fuse_perm_getattr(inode, mask); 1330 if (!err) 1331 err = generic_permission(&init_user_ns, 1332 inode, mask); 1333 } 1334 1335 /* Note: the opposite of the above test does not 1336 exist. So if permissions are revoked this won't be 1337 noticed immediately, only after the attribute 1338 timeout has expired */ 1339 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1340 err = fuse_access(inode, mask); 1341 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1342 if (!(inode->i_mode & S_IXUGO)) { 1343 if (refreshed) 1344 return -EACCES; 1345 1346 err = fuse_perm_getattr(inode, mask); 1347 if (!err && !(inode->i_mode & S_IXUGO)) 1348 return -EACCES; 1349 } 1350 } 1351 return err; 1352 } 1353 1354 static int fuse_readlink_page(struct inode *inode, struct page *page) 1355 { 1356 struct fuse_mount *fm = get_fuse_mount(inode); 1357 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; 1358 struct fuse_args_pages ap = { 1359 .num_pages = 1, 1360 .pages = &page, 1361 .descs = &desc, 1362 }; 1363 char *link; 1364 ssize_t res; 1365 1366 ap.args.opcode = FUSE_READLINK; 1367 ap.args.nodeid = get_node_id(inode); 1368 ap.args.out_pages = true; 1369 ap.args.out_argvar = true; 1370 ap.args.page_zeroing = true; 1371 ap.args.out_numargs = 1; 1372 ap.args.out_args[0].size = desc.length; 1373 res = fuse_simple_request(fm, &ap.args); 1374 1375 fuse_invalidate_atime(inode); 1376 1377 if (res < 0) 1378 return res; 1379 1380 if (WARN_ON(res >= PAGE_SIZE)) 1381 return -EIO; 1382 1383 link = page_address(page); 1384 link[res] = '\0'; 1385 1386 return 0; 1387 } 1388 1389 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, 1390 struct delayed_call *callback) 1391 { 1392 struct fuse_conn *fc = get_fuse_conn(inode); 1393 struct page *page; 1394 int err; 1395 1396 err = -EIO; 1397 if (fuse_is_bad(inode)) 1398 goto out_err; 1399 1400 if (fc->cache_symlinks) 1401 return page_get_link(dentry, inode, callback); 1402 1403 err = -ECHILD; 1404 if (!dentry) 1405 goto out_err; 1406 1407 page = alloc_page(GFP_KERNEL); 1408 err = -ENOMEM; 1409 if (!page) 1410 goto out_err; 1411 1412 err = fuse_readlink_page(inode, page); 1413 if (err) { 1414 __free_page(page); 1415 goto out_err; 1416 } 1417 1418 set_delayed_call(callback, page_put_link, page); 1419 1420 return page_address(page); 1421 1422 out_err: 1423 return ERR_PTR(err); 1424 } 1425 1426 static int fuse_dir_open(struct inode *inode, struct file *file) 1427 { 1428 return fuse_open_common(inode, file, true); 1429 } 1430 1431 static int fuse_dir_release(struct inode *inode, struct file *file) 1432 { 1433 fuse_release_common(file, true); 1434 1435 return 0; 1436 } 1437 1438 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, 1439 int datasync) 1440 { 1441 struct inode *inode = file->f_mapping->host; 1442 struct fuse_conn *fc = get_fuse_conn(inode); 1443 int err; 1444 1445 if (fuse_is_bad(inode)) 1446 return -EIO; 1447 1448 if (fc->no_fsyncdir) 1449 return 0; 1450 1451 inode_lock(inode); 1452 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR); 1453 if (err == -ENOSYS) { 1454 fc->no_fsyncdir = 1; 1455 err = 0; 1456 } 1457 inode_unlock(inode); 1458 1459 return err; 1460 } 1461 1462 static long fuse_dir_ioctl(struct file *file, unsigned int cmd, 1463 unsigned long arg) 1464 { 1465 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1466 1467 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ 1468 if (fc->minor < 18) 1469 return -ENOTTY; 1470 1471 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); 1472 } 1473 1474 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, 1475 unsigned long arg) 1476 { 1477 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1478 1479 if (fc->minor < 18) 1480 return -ENOTTY; 1481 1482 return fuse_ioctl_common(file, cmd, arg, 1483 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); 1484 } 1485 1486 static bool update_mtime(unsigned ivalid, bool trust_local_mtime) 1487 { 1488 /* Always update if mtime is explicitly set */ 1489 if (ivalid & ATTR_MTIME_SET) 1490 return true; 1491 1492 /* Or if kernel i_mtime is the official one */ 1493 if (trust_local_mtime) 1494 return true; 1495 1496 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ 1497 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) 1498 return false; 1499 1500 /* In all other cases update */ 1501 return true; 1502 } 1503 1504 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, 1505 struct fuse_setattr_in *arg, bool trust_local_cmtime) 1506 { 1507 unsigned ivalid = iattr->ia_valid; 1508 1509 if (ivalid & ATTR_MODE) 1510 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1511 if (ivalid & ATTR_UID) 1512 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); 1513 if (ivalid & ATTR_GID) 1514 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid); 1515 if (ivalid & ATTR_SIZE) 1516 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1517 if (ivalid & ATTR_ATIME) { 1518 arg->valid |= FATTR_ATIME; 1519 arg->atime = iattr->ia_atime.tv_sec; 1520 arg->atimensec = iattr->ia_atime.tv_nsec; 1521 if (!(ivalid & ATTR_ATIME_SET)) 1522 arg->valid |= FATTR_ATIME_NOW; 1523 } 1524 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { 1525 arg->valid |= FATTR_MTIME; 1526 arg->mtime = iattr->ia_mtime.tv_sec; 1527 arg->mtimensec = iattr->ia_mtime.tv_nsec; 1528 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) 1529 arg->valid |= FATTR_MTIME_NOW; 1530 } 1531 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { 1532 arg->valid |= FATTR_CTIME; 1533 arg->ctime = iattr->ia_ctime.tv_sec; 1534 arg->ctimensec = iattr->ia_ctime.tv_nsec; 1535 } 1536 } 1537 1538 /* 1539 * Prevent concurrent writepages on inode 1540 * 1541 * This is done by adding a negative bias to the inode write counter 1542 * and waiting for all pending writes to finish. 1543 */ 1544 void fuse_set_nowrite(struct inode *inode) 1545 { 1546 struct fuse_inode *fi = get_fuse_inode(inode); 1547 1548 BUG_ON(!inode_is_locked(inode)); 1549 1550 spin_lock(&fi->lock); 1551 BUG_ON(fi->writectr < 0); 1552 fi->writectr += FUSE_NOWRITE; 1553 spin_unlock(&fi->lock); 1554 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); 1555 } 1556 1557 /* 1558 * Allow writepages on inode 1559 * 1560 * Remove the bias from the writecounter and send any queued 1561 * writepages. 1562 */ 1563 static void __fuse_release_nowrite(struct inode *inode) 1564 { 1565 struct fuse_inode *fi = get_fuse_inode(inode); 1566 1567 BUG_ON(fi->writectr != FUSE_NOWRITE); 1568 fi->writectr = 0; 1569 fuse_flush_writepages(inode); 1570 } 1571 1572 void fuse_release_nowrite(struct inode *inode) 1573 { 1574 struct fuse_inode *fi = get_fuse_inode(inode); 1575 1576 spin_lock(&fi->lock); 1577 __fuse_release_nowrite(inode); 1578 spin_unlock(&fi->lock); 1579 } 1580 1581 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, 1582 struct inode *inode, 1583 struct fuse_setattr_in *inarg_p, 1584 struct fuse_attr_out *outarg_p) 1585 { 1586 args->opcode = FUSE_SETATTR; 1587 args->nodeid = get_node_id(inode); 1588 args->in_numargs = 1; 1589 args->in_args[0].size = sizeof(*inarg_p); 1590 args->in_args[0].value = inarg_p; 1591 args->out_numargs = 1; 1592 args->out_args[0].size = sizeof(*outarg_p); 1593 args->out_args[0].value = outarg_p; 1594 } 1595 1596 /* 1597 * Flush inode->i_mtime to the server 1598 */ 1599 int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 1600 { 1601 struct fuse_mount *fm = get_fuse_mount(inode); 1602 FUSE_ARGS(args); 1603 struct fuse_setattr_in inarg; 1604 struct fuse_attr_out outarg; 1605 1606 memset(&inarg, 0, sizeof(inarg)); 1607 memset(&outarg, 0, sizeof(outarg)); 1608 1609 inarg.valid = FATTR_MTIME; 1610 inarg.mtime = inode->i_mtime.tv_sec; 1611 inarg.mtimensec = inode->i_mtime.tv_nsec; 1612 if (fm->fc->minor >= 23) { 1613 inarg.valid |= FATTR_CTIME; 1614 inarg.ctime = inode->i_ctime.tv_sec; 1615 inarg.ctimensec = inode->i_ctime.tv_nsec; 1616 } 1617 if (ff) { 1618 inarg.valid |= FATTR_FH; 1619 inarg.fh = ff->fh; 1620 } 1621 fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg); 1622 1623 return fuse_simple_request(fm, &args); 1624 } 1625 1626 /* 1627 * Set attributes, and at the same time refresh them. 1628 * 1629 * Truncation is slightly complicated, because the 'truncate' request 1630 * may fail, in which case we don't want to touch the mapping. 1631 * vmtruncate() doesn't allow for this case, so do the rlimit checking 1632 * and the actual truncation by hand. 1633 */ 1634 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, 1635 struct file *file) 1636 { 1637 struct inode *inode = d_inode(dentry); 1638 struct fuse_mount *fm = get_fuse_mount(inode); 1639 struct fuse_conn *fc = fm->fc; 1640 struct fuse_inode *fi = get_fuse_inode(inode); 1641 struct address_space *mapping = inode->i_mapping; 1642 FUSE_ARGS(args); 1643 struct fuse_setattr_in inarg; 1644 struct fuse_attr_out outarg; 1645 bool is_truncate = false; 1646 bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); 1647 loff_t oldsize; 1648 int err; 1649 bool trust_local_cmtime = is_wb; 1650 bool fault_blocked = false; 1651 1652 if (!fc->default_permissions) 1653 attr->ia_valid |= ATTR_FORCE; 1654 1655 err = setattr_prepare(&init_user_ns, dentry, attr); 1656 if (err) 1657 return err; 1658 1659 if (attr->ia_valid & ATTR_SIZE) { 1660 if (WARN_ON(!S_ISREG(inode->i_mode))) 1661 return -EIO; 1662 is_truncate = true; 1663 } 1664 1665 if (FUSE_IS_DAX(inode) && is_truncate) { 1666 filemap_invalidate_lock(mapping); 1667 fault_blocked = true; 1668 err = fuse_dax_break_layouts(inode, 0, 0); 1669 if (err) { 1670 filemap_invalidate_unlock(mapping); 1671 return err; 1672 } 1673 } 1674 1675 if (attr->ia_valid & ATTR_OPEN) { 1676 /* This is coming from open(..., ... | O_TRUNC); */ 1677 WARN_ON(!(attr->ia_valid & ATTR_SIZE)); 1678 WARN_ON(attr->ia_size != 0); 1679 if (fc->atomic_o_trunc) { 1680 /* 1681 * No need to send request to userspace, since actual 1682 * truncation has already been done by OPEN. But still 1683 * need to truncate page cache. 1684 */ 1685 i_size_write(inode, 0); 1686 truncate_pagecache(inode, 0); 1687 goto out; 1688 } 1689 file = NULL; 1690 } 1691 1692 /* Flush dirty data/metadata before non-truncate SETATTR */ 1693 if (is_wb && 1694 attr->ia_valid & 1695 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | 1696 ATTR_TIMES_SET)) { 1697 err = write_inode_now(inode, true); 1698 if (err) 1699 return err; 1700 1701 fuse_set_nowrite(inode); 1702 fuse_release_nowrite(inode); 1703 } 1704 1705 if (is_truncate) { 1706 fuse_set_nowrite(inode); 1707 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1708 if (trust_local_cmtime && attr->ia_size != inode->i_size) 1709 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; 1710 } 1711 1712 memset(&inarg, 0, sizeof(inarg)); 1713 memset(&outarg, 0, sizeof(outarg)); 1714 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime); 1715 if (file) { 1716 struct fuse_file *ff = file->private_data; 1717 inarg.valid |= FATTR_FH; 1718 inarg.fh = ff->fh; 1719 } 1720 1721 /* Kill suid/sgid for non-directory chown unconditionally */ 1722 if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) && 1723 attr->ia_valid & (ATTR_UID | ATTR_GID)) 1724 inarg.valid |= FATTR_KILL_SUIDGID; 1725 1726 if (attr->ia_valid & ATTR_SIZE) { 1727 /* For mandatory locking in truncate */ 1728 inarg.valid |= FATTR_LOCKOWNER; 1729 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 1730 1731 /* Kill suid/sgid for truncate only if no CAP_FSETID */ 1732 if (fc->handle_killpriv_v2 && !capable(CAP_FSETID)) 1733 inarg.valid |= FATTR_KILL_SUIDGID; 1734 } 1735 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1736 err = fuse_simple_request(fm, &args); 1737 if (err) { 1738 if (err == -EINTR) 1739 fuse_invalidate_attr(inode); 1740 goto error; 1741 } 1742 1743 if (fuse_invalid_attr(&outarg.attr) || 1744 inode_wrong_type(inode, outarg.attr.mode)) { 1745 fuse_make_bad(inode); 1746 err = -EIO; 1747 goto error; 1748 } 1749 1750 spin_lock(&fi->lock); 1751 /* the kernel maintains i_mtime locally */ 1752 if (trust_local_cmtime) { 1753 if (attr->ia_valid & ATTR_MTIME) 1754 inode->i_mtime = attr->ia_mtime; 1755 if (attr->ia_valid & ATTR_CTIME) 1756 inode->i_ctime = attr->ia_ctime; 1757 /* FIXME: clear I_DIRTY_SYNC? */ 1758 } 1759 1760 fuse_change_attributes_common(inode, &outarg.attr, 1761 attr_timeout(&outarg), 1762 fuse_get_cache_mask(inode)); 1763 oldsize = inode->i_size; 1764 /* see the comment in fuse_change_attributes() */ 1765 if (!is_wb || is_truncate) 1766 i_size_write(inode, outarg.attr.size); 1767 1768 if (is_truncate) { 1769 /* NOTE: this may release/reacquire fi->lock */ 1770 __fuse_release_nowrite(inode); 1771 } 1772 spin_unlock(&fi->lock); 1773 1774 /* 1775 * Only call invalidate_inode_pages2() after removing 1776 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock. 1777 */ 1778 if ((is_truncate || !is_wb) && 1779 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { 1780 truncate_pagecache(inode, outarg.attr.size); 1781 invalidate_inode_pages2(mapping); 1782 } 1783 1784 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1785 out: 1786 if (fault_blocked) 1787 filemap_invalidate_unlock(mapping); 1788 1789 return 0; 1790 1791 error: 1792 if (is_truncate) 1793 fuse_release_nowrite(inode); 1794 1795 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1796 1797 if (fault_blocked) 1798 filemap_invalidate_unlock(mapping); 1799 return err; 1800 } 1801 1802 static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry, 1803 struct iattr *attr) 1804 { 1805 struct inode *inode = d_inode(entry); 1806 struct fuse_conn *fc = get_fuse_conn(inode); 1807 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; 1808 int ret; 1809 1810 if (fuse_is_bad(inode)) 1811 return -EIO; 1812 1813 if (!fuse_allow_current_process(get_fuse_conn(inode))) 1814 return -EACCES; 1815 1816 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) { 1817 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | 1818 ATTR_MODE); 1819 1820 /* 1821 * The only sane way to reliably kill suid/sgid is to do it in 1822 * the userspace filesystem 1823 * 1824 * This should be done on write(), truncate() and chown(). 1825 */ 1826 if (!fc->handle_killpriv && !fc->handle_killpriv_v2) { 1827 /* 1828 * ia_mode calculation may have used stale i_mode. 1829 * Refresh and recalculate. 1830 */ 1831 ret = fuse_do_getattr(inode, NULL, file); 1832 if (ret) 1833 return ret; 1834 1835 attr->ia_mode = inode->i_mode; 1836 if (inode->i_mode & S_ISUID) { 1837 attr->ia_valid |= ATTR_MODE; 1838 attr->ia_mode &= ~S_ISUID; 1839 } 1840 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 1841 attr->ia_valid |= ATTR_MODE; 1842 attr->ia_mode &= ~S_ISGID; 1843 } 1844 } 1845 } 1846 if (!attr->ia_valid) 1847 return 0; 1848 1849 ret = fuse_do_setattr(entry, attr, file); 1850 if (!ret) { 1851 /* 1852 * If filesystem supports acls it may have updated acl xattrs in 1853 * the filesystem, so forget cached acls for the inode. 1854 */ 1855 if (fc->posix_acl) 1856 forget_all_cached_acls(inode); 1857 1858 /* Directory mode changed, may need to revalidate access */ 1859 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE)) 1860 fuse_invalidate_entry_cache(entry); 1861 } 1862 return ret; 1863 } 1864 1865 static int fuse_getattr(struct user_namespace *mnt_userns, 1866 const struct path *path, struct kstat *stat, 1867 u32 request_mask, unsigned int flags) 1868 { 1869 struct inode *inode = d_inode(path->dentry); 1870 struct fuse_conn *fc = get_fuse_conn(inode); 1871 1872 if (fuse_is_bad(inode)) 1873 return -EIO; 1874 1875 if (!fuse_allow_current_process(fc)) { 1876 if (!request_mask) { 1877 /* 1878 * If user explicitly requested *nothing* then don't 1879 * error out, but return st_dev only. 1880 */ 1881 stat->result_mask = 0; 1882 stat->dev = inode->i_sb->s_dev; 1883 return 0; 1884 } 1885 return -EACCES; 1886 } 1887 1888 return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); 1889 } 1890 1891 static const struct inode_operations fuse_dir_inode_operations = { 1892 .lookup = fuse_lookup, 1893 .mkdir = fuse_mkdir, 1894 .symlink = fuse_symlink, 1895 .unlink = fuse_unlink, 1896 .rmdir = fuse_rmdir, 1897 .rename = fuse_rename2, 1898 .link = fuse_link, 1899 .setattr = fuse_setattr, 1900 .create = fuse_create, 1901 .atomic_open = fuse_atomic_open, 1902 .mknod = fuse_mknod, 1903 .permission = fuse_permission, 1904 .getattr = fuse_getattr, 1905 .listxattr = fuse_listxattr, 1906 .get_acl = fuse_get_acl, 1907 .set_acl = fuse_set_acl, 1908 .fileattr_get = fuse_fileattr_get, 1909 .fileattr_set = fuse_fileattr_set, 1910 }; 1911 1912 static const struct file_operations fuse_dir_operations = { 1913 .llseek = generic_file_llseek, 1914 .read = generic_read_dir, 1915 .iterate_shared = fuse_readdir, 1916 .open = fuse_dir_open, 1917 .release = fuse_dir_release, 1918 .fsync = fuse_dir_fsync, 1919 .unlocked_ioctl = fuse_dir_ioctl, 1920 .compat_ioctl = fuse_dir_compat_ioctl, 1921 }; 1922 1923 static const struct inode_operations fuse_common_inode_operations = { 1924 .setattr = fuse_setattr, 1925 .permission = fuse_permission, 1926 .getattr = fuse_getattr, 1927 .listxattr = fuse_listxattr, 1928 .get_acl = fuse_get_acl, 1929 .set_acl = fuse_set_acl, 1930 .fileattr_get = fuse_fileattr_get, 1931 .fileattr_set = fuse_fileattr_set, 1932 }; 1933 1934 static const struct inode_operations fuse_symlink_inode_operations = { 1935 .setattr = fuse_setattr, 1936 .get_link = fuse_get_link, 1937 .getattr = fuse_getattr, 1938 .listxattr = fuse_listxattr, 1939 }; 1940 1941 void fuse_init_common(struct inode *inode) 1942 { 1943 inode->i_op = &fuse_common_inode_operations; 1944 } 1945 1946 void fuse_init_dir(struct inode *inode) 1947 { 1948 struct fuse_inode *fi = get_fuse_inode(inode); 1949 1950 inode->i_op = &fuse_dir_inode_operations; 1951 inode->i_fop = &fuse_dir_operations; 1952 1953 spin_lock_init(&fi->rdc.lock); 1954 fi->rdc.cached = false; 1955 fi->rdc.size = 0; 1956 fi->rdc.pos = 0; 1957 fi->rdc.version = 0; 1958 } 1959 1960 static int fuse_symlink_readpage(struct file *null, struct page *page) 1961 { 1962 int err = fuse_readlink_page(page->mapping->host, page); 1963 1964 if (!err) 1965 SetPageUptodate(page); 1966 1967 unlock_page(page); 1968 1969 return err; 1970 } 1971 1972 static const struct address_space_operations fuse_symlink_aops = { 1973 .readpage = fuse_symlink_readpage, 1974 }; 1975 1976 void fuse_init_symlink(struct inode *inode) 1977 { 1978 inode->i_op = &fuse_symlink_inode_operations; 1979 inode->i_data.a_ops = &fuse_symlink_aops; 1980 inode_nohighmem(inode); 1981 } 1982