1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 #include "fuse_i.h" 10 11 #include <linux/pagemap.h> 12 #include <linux/file.h> 13 #include <linux/fs_context.h> 14 #include <linux/sched.h> 15 #include <linux/namei.h> 16 #include <linux/slab.h> 17 #include <linux/xattr.h> 18 #include <linux/iversion.h> 19 #include <linux/posix_acl.h> 20 21 static void fuse_advise_use_readdirplus(struct inode *dir) 22 { 23 struct fuse_inode *fi = get_fuse_inode(dir); 24 25 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); 26 } 27 28 #if BITS_PER_LONG >= 64 29 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) 30 { 31 entry->d_fsdata = (void *) time; 32 } 33 34 static inline u64 fuse_dentry_time(const struct dentry *entry) 35 { 36 return (u64)entry->d_fsdata; 37 } 38 39 #else 40 union fuse_dentry { 41 u64 time; 42 struct rcu_head rcu; 43 }; 44 45 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) 46 { 47 ((union fuse_dentry *) dentry->d_fsdata)->time = time; 48 } 49 50 static inline u64 fuse_dentry_time(const struct dentry *entry) 51 { 52 return ((union fuse_dentry *) entry->d_fsdata)->time; 53 } 54 #endif 55 56 static void fuse_dentry_settime(struct dentry *dentry, u64 time) 57 { 58 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb); 59 bool delete = !time && fc->delete_stale; 60 /* 61 * Mess with DCACHE_OP_DELETE because dput() will be faster without it. 62 * Don't care about races, either way it's just an optimization 63 */ 64 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) || 65 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) { 66 spin_lock(&dentry->d_lock); 67 if (!delete) 68 dentry->d_flags &= ~DCACHE_OP_DELETE; 69 else 70 dentry->d_flags |= DCACHE_OP_DELETE; 71 spin_unlock(&dentry->d_lock); 72 } 73 74 __fuse_dentry_settime(dentry, time); 75 } 76 77 /* 78 * FUSE caches dentries and attributes with separate timeout. The 79 * time in jiffies until the dentry/attributes are valid is stored in 80 * dentry->d_fsdata and fuse_inode->i_time respectively. 81 */ 82 83 /* 84 * Calculate the time in jiffies until a dentry/attributes are valid 85 */ 86 static u64 time_to_jiffies(u64 sec, u32 nsec) 87 { 88 if (sec || nsec) { 89 struct timespec64 ts = { 90 sec, 91 min_t(u32, nsec, NSEC_PER_SEC - 1) 92 }; 93 94 return get_jiffies_64() + timespec64_to_jiffies(&ts); 95 } else 96 return 0; 97 } 98 99 /* 100 * Set dentry and possibly attribute timeouts from the lookup/mk* 101 * replies 102 */ 103 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o) 104 { 105 fuse_dentry_settime(entry, 106 time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); 107 } 108 109 static u64 attr_timeout(struct fuse_attr_out *o) 110 { 111 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 112 } 113 114 u64 entry_attr_timeout(struct fuse_entry_out *o) 115 { 116 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 117 } 118 119 static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) 120 { 121 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); 122 } 123 124 /* 125 * Mark the attributes as stale, so that at the next call to 126 * ->getattr() they will be fetched from userspace 127 */ 128 void fuse_invalidate_attr(struct inode *inode) 129 { 130 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS); 131 } 132 133 static void fuse_dir_changed(struct inode *dir) 134 { 135 fuse_invalidate_attr(dir); 136 inode_maybe_inc_iversion(dir, false); 137 } 138 139 /** 140 * Mark the attributes as stale due to an atime change. Avoid the invalidate if 141 * atime is not used. 142 */ 143 void fuse_invalidate_atime(struct inode *inode) 144 { 145 if (!IS_RDONLY(inode)) 146 fuse_invalidate_attr_mask(inode, STATX_ATIME); 147 } 148 149 /* 150 * Just mark the entry as stale, so that a next attempt to look it up 151 * will result in a new lookup call to userspace 152 * 153 * This is called when a dentry is about to become negative and the 154 * timeout is unknown (unlink, rmdir, rename and in some cases 155 * lookup) 156 */ 157 void fuse_invalidate_entry_cache(struct dentry *entry) 158 { 159 fuse_dentry_settime(entry, 0); 160 } 161 162 /* 163 * Same as fuse_invalidate_entry_cache(), but also try to remove the 164 * dentry from the hash 165 */ 166 static void fuse_invalidate_entry(struct dentry *entry) 167 { 168 d_invalidate(entry); 169 fuse_invalidate_entry_cache(entry); 170 } 171 172 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, 173 u64 nodeid, const struct qstr *name, 174 struct fuse_entry_out *outarg) 175 { 176 memset(outarg, 0, sizeof(struct fuse_entry_out)); 177 args->opcode = FUSE_LOOKUP; 178 args->nodeid = nodeid; 179 args->in_numargs = 1; 180 args->in_args[0].size = name->len + 1; 181 args->in_args[0].value = name->name; 182 args->out_numargs = 1; 183 args->out_args[0].size = sizeof(struct fuse_entry_out); 184 args->out_args[0].value = outarg; 185 } 186 187 /* 188 * Check whether the dentry is still valid 189 * 190 * If the entry validity timeout has expired and the dentry is 191 * positive, try to redo the lookup. If the lookup results in a 192 * different inode, then let the VFS invalidate the dentry and redo 193 * the lookup once more. If the lookup results in the same inode, 194 * then refresh the attributes, timeouts and mark the dentry valid. 195 */ 196 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) 197 { 198 struct inode *inode; 199 struct dentry *parent; 200 struct fuse_mount *fm; 201 struct fuse_inode *fi; 202 int ret; 203 204 inode = d_inode_rcu(entry); 205 if (inode && fuse_is_bad(inode)) 206 goto invalid; 207 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || 208 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { 209 struct fuse_entry_out outarg; 210 FUSE_ARGS(args); 211 struct fuse_forget_link *forget; 212 u64 attr_version; 213 214 /* For negative dentries, always do a fresh lookup */ 215 if (!inode) 216 goto invalid; 217 218 ret = -ECHILD; 219 if (flags & LOOKUP_RCU) 220 goto out; 221 222 fm = get_fuse_mount(inode); 223 224 forget = fuse_alloc_forget(); 225 ret = -ENOMEM; 226 if (!forget) 227 goto out; 228 229 attr_version = fuse_get_attr_version(fm->fc); 230 231 parent = dget_parent(entry); 232 fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), 233 &entry->d_name, &outarg); 234 ret = fuse_simple_request(fm, &args); 235 dput(parent); 236 /* Zero nodeid is same as -ENOENT */ 237 if (!ret && !outarg.nodeid) 238 ret = -ENOENT; 239 if (!ret) { 240 fi = get_fuse_inode(inode); 241 if (outarg.nodeid != get_node_id(inode) || 242 (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { 243 fuse_queue_forget(fm->fc, forget, 244 outarg.nodeid, 1); 245 goto invalid; 246 } 247 spin_lock(&fi->lock); 248 fi->nlookup++; 249 spin_unlock(&fi->lock); 250 } 251 kfree(forget); 252 if (ret == -ENOMEM) 253 goto out; 254 if (ret || fuse_invalid_attr(&outarg.attr) || 255 (outarg.attr.mode ^ inode->i_mode) & S_IFMT) 256 goto invalid; 257 258 forget_all_cached_acls(inode); 259 fuse_change_attributes(inode, &outarg.attr, 260 entry_attr_timeout(&outarg), 261 attr_version); 262 fuse_change_entry_timeout(entry, &outarg); 263 } else if (inode) { 264 fi = get_fuse_inode(inode); 265 if (flags & LOOKUP_RCU) { 266 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) 267 return -ECHILD; 268 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { 269 parent = dget_parent(entry); 270 fuse_advise_use_readdirplus(d_inode(parent)); 271 dput(parent); 272 } 273 } 274 ret = 1; 275 out: 276 return ret; 277 278 invalid: 279 ret = 0; 280 goto out; 281 } 282 283 #if BITS_PER_LONG < 64 284 static int fuse_dentry_init(struct dentry *dentry) 285 { 286 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), 287 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); 288 289 return dentry->d_fsdata ? 0 : -ENOMEM; 290 } 291 static void fuse_dentry_release(struct dentry *dentry) 292 { 293 union fuse_dentry *fd = dentry->d_fsdata; 294 295 kfree_rcu(fd, rcu); 296 } 297 #endif 298 299 static int fuse_dentry_delete(const struct dentry *dentry) 300 { 301 return time_before64(fuse_dentry_time(dentry), get_jiffies_64()); 302 } 303 304 /* 305 * Create a fuse_mount object with a new superblock (with path->dentry 306 * as the root), and return that mount so it can be auto-mounted on 307 * @path. 308 */ 309 static struct vfsmount *fuse_dentry_automount(struct path *path) 310 { 311 struct fs_context *fsc; 312 struct fuse_mount *parent_fm = get_fuse_mount_super(path->mnt->mnt_sb); 313 struct fuse_conn *fc = parent_fm->fc; 314 struct fuse_mount *fm; 315 struct vfsmount *mnt; 316 struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry)); 317 struct super_block *sb; 318 int err; 319 320 fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); 321 if (IS_ERR(fsc)) { 322 err = PTR_ERR(fsc); 323 goto out; 324 } 325 326 err = -ENOMEM; 327 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 328 if (!fm) 329 goto out_put_fsc; 330 331 fsc->s_fs_info = fm; 332 sb = sget_fc(fsc, NULL, set_anon_super_fc); 333 if (IS_ERR(sb)) { 334 err = PTR_ERR(sb); 335 kfree(fm); 336 goto out_put_fsc; 337 } 338 fm->fc = fuse_conn_get(fc); 339 340 /* Initialize superblock, making @mp_fi its root */ 341 err = fuse_fill_super_submount(sb, mp_fi); 342 if (err) 343 goto out_put_sb; 344 345 sb->s_flags |= SB_ACTIVE; 346 fsc->root = dget(sb->s_root); 347 /* We are done configuring the superblock, so unlock it */ 348 up_write(&sb->s_umount); 349 350 down_write(&fc->killsb); 351 list_add_tail(&fm->fc_entry, &fc->mounts); 352 up_write(&fc->killsb); 353 354 /* Create the submount */ 355 mnt = vfs_create_mount(fsc); 356 if (IS_ERR(mnt)) { 357 err = PTR_ERR(mnt); 358 goto out_put_fsc; 359 } 360 mntget(mnt); 361 put_fs_context(fsc); 362 return mnt; 363 364 out_put_sb: 365 /* 366 * Only jump here when fsc->root is NULL and sb is still locked 367 * (otherwise put_fs_context() will put the superblock) 368 */ 369 deactivate_locked_super(sb); 370 out_put_fsc: 371 put_fs_context(fsc); 372 out: 373 return ERR_PTR(err); 374 } 375 376 const struct dentry_operations fuse_dentry_operations = { 377 .d_revalidate = fuse_dentry_revalidate, 378 .d_delete = fuse_dentry_delete, 379 #if BITS_PER_LONG < 64 380 .d_init = fuse_dentry_init, 381 .d_release = fuse_dentry_release, 382 #endif 383 .d_automount = fuse_dentry_automount, 384 }; 385 386 const struct dentry_operations fuse_root_dentry_operations = { 387 #if BITS_PER_LONG < 64 388 .d_init = fuse_dentry_init, 389 .d_release = fuse_dentry_release, 390 #endif 391 }; 392 393 int fuse_valid_type(int m) 394 { 395 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || 396 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); 397 } 398 399 bool fuse_invalid_attr(struct fuse_attr *attr) 400 { 401 return !fuse_valid_type(attr->mode) || 402 attr->size > LLONG_MAX; 403 } 404 405 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, 406 struct fuse_entry_out *outarg, struct inode **inode) 407 { 408 struct fuse_mount *fm = get_fuse_mount_super(sb); 409 FUSE_ARGS(args); 410 struct fuse_forget_link *forget; 411 u64 attr_version; 412 int err; 413 414 *inode = NULL; 415 err = -ENAMETOOLONG; 416 if (name->len > FUSE_NAME_MAX) 417 goto out; 418 419 420 forget = fuse_alloc_forget(); 421 err = -ENOMEM; 422 if (!forget) 423 goto out; 424 425 attr_version = fuse_get_attr_version(fm->fc); 426 427 fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); 428 err = fuse_simple_request(fm, &args); 429 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 430 if (err || !outarg->nodeid) 431 goto out_put_forget; 432 433 err = -EIO; 434 if (!outarg->nodeid) 435 goto out_put_forget; 436 if (fuse_invalid_attr(&outarg->attr)) 437 goto out_put_forget; 438 439 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, 440 &outarg->attr, entry_attr_timeout(outarg), 441 attr_version); 442 err = -ENOMEM; 443 if (!*inode) { 444 fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); 445 goto out; 446 } 447 err = 0; 448 449 out_put_forget: 450 kfree(forget); 451 out: 452 return err; 453 } 454 455 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 456 unsigned int flags) 457 { 458 int err; 459 struct fuse_entry_out outarg; 460 struct inode *inode; 461 struct dentry *newent; 462 bool outarg_valid = true; 463 bool locked; 464 465 if (fuse_is_bad(dir)) 466 return ERR_PTR(-EIO); 467 468 locked = fuse_lock_inode(dir); 469 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, 470 &outarg, &inode); 471 fuse_unlock_inode(dir, locked); 472 if (err == -ENOENT) { 473 outarg_valid = false; 474 err = 0; 475 } 476 if (err) 477 goto out_err; 478 479 err = -EIO; 480 if (inode && get_node_id(inode) == FUSE_ROOT_ID) 481 goto out_iput; 482 483 newent = d_splice_alias(inode, entry); 484 err = PTR_ERR(newent); 485 if (IS_ERR(newent)) 486 goto out_err; 487 488 entry = newent ? newent : entry; 489 if (outarg_valid) 490 fuse_change_entry_timeout(entry, &outarg); 491 else 492 fuse_invalidate_entry_cache(entry); 493 494 if (inode) 495 fuse_advise_use_readdirplus(dir); 496 return newent; 497 498 out_iput: 499 iput(inode); 500 out_err: 501 return ERR_PTR(err); 502 } 503 504 /* 505 * Atomic create+open operation 506 * 507 * If the filesystem doesn't support this, then fall back to separate 508 * 'mknod' + 'open' requests. 509 */ 510 static int fuse_create_open(struct inode *dir, struct dentry *entry, 511 struct file *file, unsigned flags, 512 umode_t mode) 513 { 514 int err; 515 struct inode *inode; 516 struct fuse_mount *fm = get_fuse_mount(dir); 517 FUSE_ARGS(args); 518 struct fuse_forget_link *forget; 519 struct fuse_create_in inarg; 520 struct fuse_open_out outopen; 521 struct fuse_entry_out outentry; 522 struct fuse_inode *fi; 523 struct fuse_file *ff; 524 525 /* Userspace expects S_IFREG in create mode */ 526 BUG_ON((mode & S_IFMT) != S_IFREG); 527 528 forget = fuse_alloc_forget(); 529 err = -ENOMEM; 530 if (!forget) 531 goto out_err; 532 533 err = -ENOMEM; 534 ff = fuse_file_alloc(fm); 535 if (!ff) 536 goto out_put_forget_req; 537 538 if (!fm->fc->dont_mask) 539 mode &= ~current_umask(); 540 541 flags &= ~O_NOCTTY; 542 memset(&inarg, 0, sizeof(inarg)); 543 memset(&outentry, 0, sizeof(outentry)); 544 inarg.flags = flags; 545 inarg.mode = mode; 546 inarg.umask = current_umask(); 547 548 if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) && 549 !(flags & O_EXCL) && !capable(CAP_FSETID)) { 550 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; 551 } 552 553 args.opcode = FUSE_CREATE; 554 args.nodeid = get_node_id(dir); 555 args.in_numargs = 2; 556 args.in_args[0].size = sizeof(inarg); 557 args.in_args[0].value = &inarg; 558 args.in_args[1].size = entry->d_name.len + 1; 559 args.in_args[1].value = entry->d_name.name; 560 args.out_numargs = 2; 561 args.out_args[0].size = sizeof(outentry); 562 args.out_args[0].value = &outentry; 563 args.out_args[1].size = sizeof(outopen); 564 args.out_args[1].value = &outopen; 565 err = fuse_simple_request(fm, &args); 566 if (err) 567 goto out_free_ff; 568 569 err = -EIO; 570 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || 571 fuse_invalid_attr(&outentry.attr)) 572 goto out_free_ff; 573 574 ff->fh = outopen.fh; 575 ff->nodeid = outentry.nodeid; 576 ff->open_flags = outopen.open_flags; 577 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, 578 &outentry.attr, entry_attr_timeout(&outentry), 0); 579 if (!inode) { 580 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 581 fuse_sync_release(NULL, ff, flags); 582 fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); 583 err = -ENOMEM; 584 goto out_err; 585 } 586 kfree(forget); 587 d_instantiate(entry, inode); 588 fuse_change_entry_timeout(entry, &outentry); 589 fuse_dir_changed(dir); 590 err = finish_open(file, entry, generic_file_open); 591 if (err) { 592 fi = get_fuse_inode(inode); 593 fuse_sync_release(fi, ff, flags); 594 } else { 595 file->private_data = ff; 596 fuse_finish_open(inode, file); 597 } 598 return err; 599 600 out_free_ff: 601 fuse_file_free(ff); 602 out_put_forget_req: 603 kfree(forget); 604 out_err: 605 return err; 606 } 607 608 static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *, 609 umode_t, dev_t); 610 static int fuse_atomic_open(struct inode *dir, struct dentry *entry, 611 struct file *file, unsigned flags, 612 umode_t mode) 613 { 614 int err; 615 struct fuse_conn *fc = get_fuse_conn(dir); 616 struct dentry *res = NULL; 617 618 if (fuse_is_bad(dir)) 619 return -EIO; 620 621 if (d_in_lookup(entry)) { 622 res = fuse_lookup(dir, entry, 0); 623 if (IS_ERR(res)) 624 return PTR_ERR(res); 625 626 if (res) 627 entry = res; 628 } 629 630 if (!(flags & O_CREAT) || d_really_is_positive(entry)) 631 goto no_open; 632 633 /* Only creates */ 634 file->f_mode |= FMODE_CREATED; 635 636 if (fc->no_create) 637 goto mknod; 638 639 err = fuse_create_open(dir, entry, file, flags, mode); 640 if (err == -ENOSYS) { 641 fc->no_create = 1; 642 goto mknod; 643 } 644 out_dput: 645 dput(res); 646 return err; 647 648 mknod: 649 err = fuse_mknod(&init_user_ns, dir, entry, mode, 0); 650 if (err) 651 goto out_dput; 652 no_open: 653 return finish_no_open(file, res); 654 } 655 656 /* 657 * Code shared between mknod, mkdir, symlink and link 658 */ 659 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, 660 struct inode *dir, struct dentry *entry, 661 umode_t mode) 662 { 663 struct fuse_entry_out outarg; 664 struct inode *inode; 665 struct dentry *d; 666 int err; 667 struct fuse_forget_link *forget; 668 669 if (fuse_is_bad(dir)) 670 return -EIO; 671 672 forget = fuse_alloc_forget(); 673 if (!forget) 674 return -ENOMEM; 675 676 memset(&outarg, 0, sizeof(outarg)); 677 args->nodeid = get_node_id(dir); 678 args->out_numargs = 1; 679 args->out_args[0].size = sizeof(outarg); 680 args->out_args[0].value = &outarg; 681 err = fuse_simple_request(fm, args); 682 if (err) 683 goto out_put_forget_req; 684 685 err = -EIO; 686 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr)) 687 goto out_put_forget_req; 688 689 if ((outarg.attr.mode ^ mode) & S_IFMT) 690 goto out_put_forget_req; 691 692 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 693 &outarg.attr, entry_attr_timeout(&outarg), 0); 694 if (!inode) { 695 fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); 696 return -ENOMEM; 697 } 698 kfree(forget); 699 700 d_drop(entry); 701 d = d_splice_alias(inode, entry); 702 if (IS_ERR(d)) 703 return PTR_ERR(d); 704 705 if (d) { 706 fuse_change_entry_timeout(d, &outarg); 707 dput(d); 708 } else { 709 fuse_change_entry_timeout(entry, &outarg); 710 } 711 fuse_dir_changed(dir); 712 return 0; 713 714 out_put_forget_req: 715 kfree(forget); 716 return err; 717 } 718 719 static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir, 720 struct dentry *entry, umode_t mode, dev_t rdev) 721 { 722 struct fuse_mknod_in inarg; 723 struct fuse_mount *fm = get_fuse_mount(dir); 724 FUSE_ARGS(args); 725 726 if (!fm->fc->dont_mask) 727 mode &= ~current_umask(); 728 729 memset(&inarg, 0, sizeof(inarg)); 730 inarg.mode = mode; 731 inarg.rdev = new_encode_dev(rdev); 732 inarg.umask = current_umask(); 733 args.opcode = FUSE_MKNOD; 734 args.in_numargs = 2; 735 args.in_args[0].size = sizeof(inarg); 736 args.in_args[0].value = &inarg; 737 args.in_args[1].size = entry->d_name.len + 1; 738 args.in_args[1].value = entry->d_name.name; 739 return create_new_entry(fm, &args, dir, entry, mode); 740 } 741 742 static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir, 743 struct dentry *entry, umode_t mode, bool excl) 744 { 745 return fuse_mknod(&init_user_ns, dir, entry, mode, 0); 746 } 747 748 static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir, 749 struct dentry *entry, umode_t mode) 750 { 751 struct fuse_mkdir_in inarg; 752 struct fuse_mount *fm = get_fuse_mount(dir); 753 FUSE_ARGS(args); 754 755 if (!fm->fc->dont_mask) 756 mode &= ~current_umask(); 757 758 memset(&inarg, 0, sizeof(inarg)); 759 inarg.mode = mode; 760 inarg.umask = current_umask(); 761 args.opcode = FUSE_MKDIR; 762 args.in_numargs = 2; 763 args.in_args[0].size = sizeof(inarg); 764 args.in_args[0].value = &inarg; 765 args.in_args[1].size = entry->d_name.len + 1; 766 args.in_args[1].value = entry->d_name.name; 767 return create_new_entry(fm, &args, dir, entry, S_IFDIR); 768 } 769 770 static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir, 771 struct dentry *entry, const char *link) 772 { 773 struct fuse_mount *fm = get_fuse_mount(dir); 774 unsigned len = strlen(link) + 1; 775 FUSE_ARGS(args); 776 777 args.opcode = FUSE_SYMLINK; 778 args.in_numargs = 2; 779 args.in_args[0].size = entry->d_name.len + 1; 780 args.in_args[0].value = entry->d_name.name; 781 args.in_args[1].size = len; 782 args.in_args[1].value = link; 783 return create_new_entry(fm, &args, dir, entry, S_IFLNK); 784 } 785 786 void fuse_update_ctime(struct inode *inode) 787 { 788 if (!IS_NOCMTIME(inode)) { 789 inode->i_ctime = current_time(inode); 790 mark_inode_dirty_sync(inode); 791 } 792 } 793 794 static int fuse_unlink(struct inode *dir, struct dentry *entry) 795 { 796 int err; 797 struct fuse_mount *fm = get_fuse_mount(dir); 798 FUSE_ARGS(args); 799 800 if (fuse_is_bad(dir)) 801 return -EIO; 802 803 args.opcode = FUSE_UNLINK; 804 args.nodeid = get_node_id(dir); 805 args.in_numargs = 1; 806 args.in_args[0].size = entry->d_name.len + 1; 807 args.in_args[0].value = entry->d_name.name; 808 err = fuse_simple_request(fm, &args); 809 if (!err) { 810 struct inode *inode = d_inode(entry); 811 struct fuse_inode *fi = get_fuse_inode(inode); 812 813 spin_lock(&fi->lock); 814 fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); 815 /* 816 * If i_nlink == 0 then unlink doesn't make sense, yet this can 817 * happen if userspace filesystem is careless. It would be 818 * difficult to enforce correct nlink usage so just ignore this 819 * condition here 820 */ 821 if (inode->i_nlink > 0) 822 drop_nlink(inode); 823 spin_unlock(&fi->lock); 824 fuse_invalidate_attr(inode); 825 fuse_dir_changed(dir); 826 fuse_invalidate_entry_cache(entry); 827 fuse_update_ctime(inode); 828 } else if (err == -EINTR) 829 fuse_invalidate_entry(entry); 830 return err; 831 } 832 833 static int fuse_rmdir(struct inode *dir, struct dentry *entry) 834 { 835 int err; 836 struct fuse_mount *fm = get_fuse_mount(dir); 837 FUSE_ARGS(args); 838 839 if (fuse_is_bad(dir)) 840 return -EIO; 841 842 args.opcode = FUSE_RMDIR; 843 args.nodeid = get_node_id(dir); 844 args.in_numargs = 1; 845 args.in_args[0].size = entry->d_name.len + 1; 846 args.in_args[0].value = entry->d_name.name; 847 err = fuse_simple_request(fm, &args); 848 if (!err) { 849 clear_nlink(d_inode(entry)); 850 fuse_dir_changed(dir); 851 fuse_invalidate_entry_cache(entry); 852 } else if (err == -EINTR) 853 fuse_invalidate_entry(entry); 854 return err; 855 } 856 857 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, 858 struct inode *newdir, struct dentry *newent, 859 unsigned int flags, int opcode, size_t argsize) 860 { 861 int err; 862 struct fuse_rename2_in inarg; 863 struct fuse_mount *fm = get_fuse_mount(olddir); 864 FUSE_ARGS(args); 865 866 memset(&inarg, 0, argsize); 867 inarg.newdir = get_node_id(newdir); 868 inarg.flags = flags; 869 args.opcode = opcode; 870 args.nodeid = get_node_id(olddir); 871 args.in_numargs = 3; 872 args.in_args[0].size = argsize; 873 args.in_args[0].value = &inarg; 874 args.in_args[1].size = oldent->d_name.len + 1; 875 args.in_args[1].value = oldent->d_name.name; 876 args.in_args[2].size = newent->d_name.len + 1; 877 args.in_args[2].value = newent->d_name.name; 878 err = fuse_simple_request(fm, &args); 879 if (!err) { 880 /* ctime changes */ 881 fuse_invalidate_attr(d_inode(oldent)); 882 fuse_update_ctime(d_inode(oldent)); 883 884 if (flags & RENAME_EXCHANGE) { 885 fuse_invalidate_attr(d_inode(newent)); 886 fuse_update_ctime(d_inode(newent)); 887 } 888 889 fuse_dir_changed(olddir); 890 if (olddir != newdir) 891 fuse_dir_changed(newdir); 892 893 /* newent will end up negative */ 894 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) { 895 fuse_invalidate_attr(d_inode(newent)); 896 fuse_invalidate_entry_cache(newent); 897 fuse_update_ctime(d_inode(newent)); 898 } 899 } else if (err == -EINTR) { 900 /* If request was interrupted, DEITY only knows if the 901 rename actually took place. If the invalidation 902 fails (e.g. some process has CWD under the renamed 903 directory), then there can be inconsistency between 904 the dcache and the real filesystem. Tough luck. */ 905 fuse_invalidate_entry(oldent); 906 if (d_really_is_positive(newent)) 907 fuse_invalidate_entry(newent); 908 } 909 910 return err; 911 } 912 913 static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir, 914 struct dentry *oldent, struct inode *newdir, 915 struct dentry *newent, unsigned int flags) 916 { 917 struct fuse_conn *fc = get_fuse_conn(olddir); 918 int err; 919 920 if (fuse_is_bad(olddir)) 921 return -EIO; 922 923 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) 924 return -EINVAL; 925 926 if (flags) { 927 if (fc->no_rename2 || fc->minor < 23) 928 return -EINVAL; 929 930 err = fuse_rename_common(olddir, oldent, newdir, newent, flags, 931 FUSE_RENAME2, 932 sizeof(struct fuse_rename2_in)); 933 if (err == -ENOSYS) { 934 fc->no_rename2 = 1; 935 err = -EINVAL; 936 } 937 } else { 938 err = fuse_rename_common(olddir, oldent, newdir, newent, 0, 939 FUSE_RENAME, 940 sizeof(struct fuse_rename_in)); 941 } 942 943 return err; 944 } 945 946 static int fuse_link(struct dentry *entry, struct inode *newdir, 947 struct dentry *newent) 948 { 949 int err; 950 struct fuse_link_in inarg; 951 struct inode *inode = d_inode(entry); 952 struct fuse_mount *fm = get_fuse_mount(inode); 953 FUSE_ARGS(args); 954 955 memset(&inarg, 0, sizeof(inarg)); 956 inarg.oldnodeid = get_node_id(inode); 957 args.opcode = FUSE_LINK; 958 args.in_numargs = 2; 959 args.in_args[0].size = sizeof(inarg); 960 args.in_args[0].value = &inarg; 961 args.in_args[1].size = newent->d_name.len + 1; 962 args.in_args[1].value = newent->d_name.name; 963 err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); 964 /* Contrary to "normal" filesystems it can happen that link 965 makes two "logical" inodes point to the same "physical" 966 inode. We invalidate the attributes of the old one, so it 967 will reflect changes in the backing inode (link count, 968 etc.) 969 */ 970 if (!err) { 971 struct fuse_inode *fi = get_fuse_inode(inode); 972 973 spin_lock(&fi->lock); 974 fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); 975 if (likely(inode->i_nlink < UINT_MAX)) 976 inc_nlink(inode); 977 spin_unlock(&fi->lock); 978 fuse_invalidate_attr(inode); 979 fuse_update_ctime(inode); 980 } else if (err == -EINTR) { 981 fuse_invalidate_attr(inode); 982 } 983 return err; 984 } 985 986 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, 987 struct kstat *stat) 988 { 989 unsigned int blkbits; 990 struct fuse_conn *fc = get_fuse_conn(inode); 991 992 /* see the comment in fuse_change_attributes() */ 993 if (fc->writeback_cache && S_ISREG(inode->i_mode)) { 994 attr->size = i_size_read(inode); 995 attr->mtime = inode->i_mtime.tv_sec; 996 attr->mtimensec = inode->i_mtime.tv_nsec; 997 attr->ctime = inode->i_ctime.tv_sec; 998 attr->ctimensec = inode->i_ctime.tv_nsec; 999 } 1000 1001 stat->dev = inode->i_sb->s_dev; 1002 stat->ino = attr->ino; 1003 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 1004 stat->nlink = attr->nlink; 1005 stat->uid = make_kuid(fc->user_ns, attr->uid); 1006 stat->gid = make_kgid(fc->user_ns, attr->gid); 1007 stat->rdev = inode->i_rdev; 1008 stat->atime.tv_sec = attr->atime; 1009 stat->atime.tv_nsec = attr->atimensec; 1010 stat->mtime.tv_sec = attr->mtime; 1011 stat->mtime.tv_nsec = attr->mtimensec; 1012 stat->ctime.tv_sec = attr->ctime; 1013 stat->ctime.tv_nsec = attr->ctimensec; 1014 stat->size = attr->size; 1015 stat->blocks = attr->blocks; 1016 1017 if (attr->blksize != 0) 1018 blkbits = ilog2(attr->blksize); 1019 else 1020 blkbits = inode->i_sb->s_blocksize_bits; 1021 1022 stat->blksize = 1 << blkbits; 1023 } 1024 1025 static int fuse_do_getattr(struct inode *inode, struct kstat *stat, 1026 struct file *file) 1027 { 1028 int err; 1029 struct fuse_getattr_in inarg; 1030 struct fuse_attr_out outarg; 1031 struct fuse_mount *fm = get_fuse_mount(inode); 1032 FUSE_ARGS(args); 1033 u64 attr_version; 1034 1035 attr_version = fuse_get_attr_version(fm->fc); 1036 1037 memset(&inarg, 0, sizeof(inarg)); 1038 memset(&outarg, 0, sizeof(outarg)); 1039 /* Directories have separate file-handle space */ 1040 if (file && S_ISREG(inode->i_mode)) { 1041 struct fuse_file *ff = file->private_data; 1042 1043 inarg.getattr_flags |= FUSE_GETATTR_FH; 1044 inarg.fh = ff->fh; 1045 } 1046 args.opcode = FUSE_GETATTR; 1047 args.nodeid = get_node_id(inode); 1048 args.in_numargs = 1; 1049 args.in_args[0].size = sizeof(inarg); 1050 args.in_args[0].value = &inarg; 1051 args.out_numargs = 1; 1052 args.out_args[0].size = sizeof(outarg); 1053 args.out_args[0].value = &outarg; 1054 err = fuse_simple_request(fm, &args); 1055 if (!err) { 1056 if (fuse_invalid_attr(&outarg.attr) || 1057 (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { 1058 fuse_make_bad(inode); 1059 err = -EIO; 1060 } else { 1061 fuse_change_attributes(inode, &outarg.attr, 1062 attr_timeout(&outarg), 1063 attr_version); 1064 if (stat) 1065 fuse_fillattr(inode, &outarg.attr, stat); 1066 } 1067 } 1068 return err; 1069 } 1070 1071 static int fuse_update_get_attr(struct inode *inode, struct file *file, 1072 struct kstat *stat, u32 request_mask, 1073 unsigned int flags) 1074 { 1075 struct fuse_inode *fi = get_fuse_inode(inode); 1076 int err = 0; 1077 bool sync; 1078 1079 if (flags & AT_STATX_FORCE_SYNC) 1080 sync = true; 1081 else if (flags & AT_STATX_DONT_SYNC) 1082 sync = false; 1083 else if (request_mask & READ_ONCE(fi->inval_mask)) 1084 sync = true; 1085 else 1086 sync = time_before64(fi->i_time, get_jiffies_64()); 1087 1088 if (sync) { 1089 forget_all_cached_acls(inode); 1090 err = fuse_do_getattr(inode, stat, file); 1091 } else if (stat) { 1092 generic_fillattr(&init_user_ns, inode, stat); 1093 stat->mode = fi->orig_i_mode; 1094 stat->ino = fi->orig_ino; 1095 } 1096 1097 return err; 1098 } 1099 1100 int fuse_update_attributes(struct inode *inode, struct file *file) 1101 { 1102 /* Do *not* need to get atime for internal purposes */ 1103 return fuse_update_get_attr(inode, file, NULL, 1104 STATX_BASIC_STATS & ~STATX_ATIME, 0); 1105 } 1106 1107 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, 1108 u64 child_nodeid, struct qstr *name) 1109 { 1110 int err = -ENOTDIR; 1111 struct inode *parent; 1112 struct dentry *dir; 1113 struct dentry *entry; 1114 1115 parent = fuse_ilookup(fc, parent_nodeid, NULL); 1116 if (!parent) 1117 return -ENOENT; 1118 1119 inode_lock(parent); 1120 if (!S_ISDIR(parent->i_mode)) 1121 goto unlock; 1122 1123 err = -ENOENT; 1124 dir = d_find_alias(parent); 1125 if (!dir) 1126 goto unlock; 1127 1128 name->hash = full_name_hash(dir, name->name, name->len); 1129 entry = d_lookup(dir, name); 1130 dput(dir); 1131 if (!entry) 1132 goto unlock; 1133 1134 fuse_dir_changed(parent); 1135 fuse_invalidate_entry(entry); 1136 1137 if (child_nodeid != 0 && d_really_is_positive(entry)) { 1138 inode_lock(d_inode(entry)); 1139 if (get_node_id(d_inode(entry)) != child_nodeid) { 1140 err = -ENOENT; 1141 goto badentry; 1142 } 1143 if (d_mountpoint(entry)) { 1144 err = -EBUSY; 1145 goto badentry; 1146 } 1147 if (d_is_dir(entry)) { 1148 shrink_dcache_parent(entry); 1149 if (!simple_empty(entry)) { 1150 err = -ENOTEMPTY; 1151 goto badentry; 1152 } 1153 d_inode(entry)->i_flags |= S_DEAD; 1154 } 1155 dont_mount(entry); 1156 clear_nlink(d_inode(entry)); 1157 err = 0; 1158 badentry: 1159 inode_unlock(d_inode(entry)); 1160 if (!err) 1161 d_delete(entry); 1162 } else { 1163 err = 0; 1164 } 1165 dput(entry); 1166 1167 unlock: 1168 inode_unlock(parent); 1169 iput(parent); 1170 return err; 1171 } 1172 1173 /* 1174 * Calling into a user-controlled filesystem gives the filesystem 1175 * daemon ptrace-like capabilities over the current process. This 1176 * means, that the filesystem daemon is able to record the exact 1177 * filesystem operations performed, and can also control the behavior 1178 * of the requester process in otherwise impossible ways. For example 1179 * it can delay the operation for arbitrary length of time allowing 1180 * DoS against the requester. 1181 * 1182 * For this reason only those processes can call into the filesystem, 1183 * for which the owner of the mount has ptrace privilege. This 1184 * excludes processes started by other users, suid or sgid processes. 1185 */ 1186 int fuse_allow_current_process(struct fuse_conn *fc) 1187 { 1188 const struct cred *cred; 1189 1190 if (fc->allow_other) 1191 return current_in_userns(fc->user_ns); 1192 1193 cred = current_cred(); 1194 if (uid_eq(cred->euid, fc->user_id) && 1195 uid_eq(cred->suid, fc->user_id) && 1196 uid_eq(cred->uid, fc->user_id) && 1197 gid_eq(cred->egid, fc->group_id) && 1198 gid_eq(cred->sgid, fc->group_id) && 1199 gid_eq(cred->gid, fc->group_id)) 1200 return 1; 1201 1202 return 0; 1203 } 1204 1205 static int fuse_access(struct inode *inode, int mask) 1206 { 1207 struct fuse_mount *fm = get_fuse_mount(inode); 1208 FUSE_ARGS(args); 1209 struct fuse_access_in inarg; 1210 int err; 1211 1212 BUG_ON(mask & MAY_NOT_BLOCK); 1213 1214 if (fm->fc->no_access) 1215 return 0; 1216 1217 memset(&inarg, 0, sizeof(inarg)); 1218 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); 1219 args.opcode = FUSE_ACCESS; 1220 args.nodeid = get_node_id(inode); 1221 args.in_numargs = 1; 1222 args.in_args[0].size = sizeof(inarg); 1223 args.in_args[0].value = &inarg; 1224 err = fuse_simple_request(fm, &args); 1225 if (err == -ENOSYS) { 1226 fm->fc->no_access = 1; 1227 err = 0; 1228 } 1229 return err; 1230 } 1231 1232 static int fuse_perm_getattr(struct inode *inode, int mask) 1233 { 1234 if (mask & MAY_NOT_BLOCK) 1235 return -ECHILD; 1236 1237 forget_all_cached_acls(inode); 1238 return fuse_do_getattr(inode, NULL, NULL); 1239 } 1240 1241 /* 1242 * Check permission. The two basic access models of FUSE are: 1243 * 1244 * 1) Local access checking ('default_permissions' mount option) based 1245 * on file mode. This is the plain old disk filesystem permission 1246 * modell. 1247 * 1248 * 2) "Remote" access checking, where server is responsible for 1249 * checking permission in each inode operation. An exception to this 1250 * is if ->permission() was invoked from sys_access() in which case an 1251 * access request is sent. Execute permission is still checked 1252 * locally based on file mode. 1253 */ 1254 static int fuse_permission(struct user_namespace *mnt_userns, 1255 struct inode *inode, int mask) 1256 { 1257 struct fuse_conn *fc = get_fuse_conn(inode); 1258 bool refreshed = false; 1259 int err = 0; 1260 1261 if (fuse_is_bad(inode)) 1262 return -EIO; 1263 1264 if (!fuse_allow_current_process(fc)) 1265 return -EACCES; 1266 1267 /* 1268 * If attributes are needed, refresh them before proceeding 1269 */ 1270 if (fc->default_permissions || 1271 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1272 struct fuse_inode *fi = get_fuse_inode(inode); 1273 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID; 1274 1275 if (perm_mask & READ_ONCE(fi->inval_mask) || 1276 time_before64(fi->i_time, get_jiffies_64())) { 1277 refreshed = true; 1278 1279 err = fuse_perm_getattr(inode, mask); 1280 if (err) 1281 return err; 1282 } 1283 } 1284 1285 if (fc->default_permissions) { 1286 err = generic_permission(&init_user_ns, inode, mask); 1287 1288 /* If permission is denied, try to refresh file 1289 attributes. This is also needed, because the root 1290 node will at first have no permissions */ 1291 if (err == -EACCES && !refreshed) { 1292 err = fuse_perm_getattr(inode, mask); 1293 if (!err) 1294 err = generic_permission(&init_user_ns, 1295 inode, mask); 1296 } 1297 1298 /* Note: the opposite of the above test does not 1299 exist. So if permissions are revoked this won't be 1300 noticed immediately, only after the attribute 1301 timeout has expired */ 1302 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1303 err = fuse_access(inode, mask); 1304 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1305 if (!(inode->i_mode & S_IXUGO)) { 1306 if (refreshed) 1307 return -EACCES; 1308 1309 err = fuse_perm_getattr(inode, mask); 1310 if (!err && !(inode->i_mode & S_IXUGO)) 1311 return -EACCES; 1312 } 1313 } 1314 return err; 1315 } 1316 1317 static int fuse_readlink_page(struct inode *inode, struct page *page) 1318 { 1319 struct fuse_mount *fm = get_fuse_mount(inode); 1320 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 }; 1321 struct fuse_args_pages ap = { 1322 .num_pages = 1, 1323 .pages = &page, 1324 .descs = &desc, 1325 }; 1326 char *link; 1327 ssize_t res; 1328 1329 ap.args.opcode = FUSE_READLINK; 1330 ap.args.nodeid = get_node_id(inode); 1331 ap.args.out_pages = true; 1332 ap.args.out_argvar = true; 1333 ap.args.page_zeroing = true; 1334 ap.args.out_numargs = 1; 1335 ap.args.out_args[0].size = desc.length; 1336 res = fuse_simple_request(fm, &ap.args); 1337 1338 fuse_invalidate_atime(inode); 1339 1340 if (res < 0) 1341 return res; 1342 1343 if (WARN_ON(res >= PAGE_SIZE)) 1344 return -EIO; 1345 1346 link = page_address(page); 1347 link[res] = '\0'; 1348 1349 return 0; 1350 } 1351 1352 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, 1353 struct delayed_call *callback) 1354 { 1355 struct fuse_conn *fc = get_fuse_conn(inode); 1356 struct page *page; 1357 int err; 1358 1359 err = -EIO; 1360 if (fuse_is_bad(inode)) 1361 goto out_err; 1362 1363 if (fc->cache_symlinks) 1364 return page_get_link(dentry, inode, callback); 1365 1366 err = -ECHILD; 1367 if (!dentry) 1368 goto out_err; 1369 1370 page = alloc_page(GFP_KERNEL); 1371 err = -ENOMEM; 1372 if (!page) 1373 goto out_err; 1374 1375 err = fuse_readlink_page(inode, page); 1376 if (err) { 1377 __free_page(page); 1378 goto out_err; 1379 } 1380 1381 set_delayed_call(callback, page_put_link, page); 1382 1383 return page_address(page); 1384 1385 out_err: 1386 return ERR_PTR(err); 1387 } 1388 1389 static int fuse_dir_open(struct inode *inode, struct file *file) 1390 { 1391 return fuse_open_common(inode, file, true); 1392 } 1393 1394 static int fuse_dir_release(struct inode *inode, struct file *file) 1395 { 1396 fuse_release_common(file, true); 1397 1398 return 0; 1399 } 1400 1401 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, 1402 int datasync) 1403 { 1404 struct inode *inode = file->f_mapping->host; 1405 struct fuse_conn *fc = get_fuse_conn(inode); 1406 int err; 1407 1408 if (fuse_is_bad(inode)) 1409 return -EIO; 1410 1411 if (fc->no_fsyncdir) 1412 return 0; 1413 1414 inode_lock(inode); 1415 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR); 1416 if (err == -ENOSYS) { 1417 fc->no_fsyncdir = 1; 1418 err = 0; 1419 } 1420 inode_unlock(inode); 1421 1422 return err; 1423 } 1424 1425 static long fuse_dir_ioctl(struct file *file, unsigned int cmd, 1426 unsigned long arg) 1427 { 1428 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1429 1430 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ 1431 if (fc->minor < 18) 1432 return -ENOTTY; 1433 1434 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); 1435 } 1436 1437 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, 1438 unsigned long arg) 1439 { 1440 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); 1441 1442 if (fc->minor < 18) 1443 return -ENOTTY; 1444 1445 return fuse_ioctl_common(file, cmd, arg, 1446 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); 1447 } 1448 1449 static bool update_mtime(unsigned ivalid, bool trust_local_mtime) 1450 { 1451 /* Always update if mtime is explicitly set */ 1452 if (ivalid & ATTR_MTIME_SET) 1453 return true; 1454 1455 /* Or if kernel i_mtime is the official one */ 1456 if (trust_local_mtime) 1457 return true; 1458 1459 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ 1460 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) 1461 return false; 1462 1463 /* In all other cases update */ 1464 return true; 1465 } 1466 1467 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, 1468 struct fuse_setattr_in *arg, bool trust_local_cmtime) 1469 { 1470 unsigned ivalid = iattr->ia_valid; 1471 1472 if (ivalid & ATTR_MODE) 1473 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1474 if (ivalid & ATTR_UID) 1475 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); 1476 if (ivalid & ATTR_GID) 1477 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid); 1478 if (ivalid & ATTR_SIZE) 1479 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1480 if (ivalid & ATTR_ATIME) { 1481 arg->valid |= FATTR_ATIME; 1482 arg->atime = iattr->ia_atime.tv_sec; 1483 arg->atimensec = iattr->ia_atime.tv_nsec; 1484 if (!(ivalid & ATTR_ATIME_SET)) 1485 arg->valid |= FATTR_ATIME_NOW; 1486 } 1487 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { 1488 arg->valid |= FATTR_MTIME; 1489 arg->mtime = iattr->ia_mtime.tv_sec; 1490 arg->mtimensec = iattr->ia_mtime.tv_nsec; 1491 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) 1492 arg->valid |= FATTR_MTIME_NOW; 1493 } 1494 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { 1495 arg->valid |= FATTR_CTIME; 1496 arg->ctime = iattr->ia_ctime.tv_sec; 1497 arg->ctimensec = iattr->ia_ctime.tv_nsec; 1498 } 1499 } 1500 1501 /* 1502 * Prevent concurrent writepages on inode 1503 * 1504 * This is done by adding a negative bias to the inode write counter 1505 * and waiting for all pending writes to finish. 1506 */ 1507 void fuse_set_nowrite(struct inode *inode) 1508 { 1509 struct fuse_inode *fi = get_fuse_inode(inode); 1510 1511 BUG_ON(!inode_is_locked(inode)); 1512 1513 spin_lock(&fi->lock); 1514 BUG_ON(fi->writectr < 0); 1515 fi->writectr += FUSE_NOWRITE; 1516 spin_unlock(&fi->lock); 1517 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); 1518 } 1519 1520 /* 1521 * Allow writepages on inode 1522 * 1523 * Remove the bias from the writecounter and send any queued 1524 * writepages. 1525 */ 1526 static void __fuse_release_nowrite(struct inode *inode) 1527 { 1528 struct fuse_inode *fi = get_fuse_inode(inode); 1529 1530 BUG_ON(fi->writectr != FUSE_NOWRITE); 1531 fi->writectr = 0; 1532 fuse_flush_writepages(inode); 1533 } 1534 1535 void fuse_release_nowrite(struct inode *inode) 1536 { 1537 struct fuse_inode *fi = get_fuse_inode(inode); 1538 1539 spin_lock(&fi->lock); 1540 __fuse_release_nowrite(inode); 1541 spin_unlock(&fi->lock); 1542 } 1543 1544 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, 1545 struct inode *inode, 1546 struct fuse_setattr_in *inarg_p, 1547 struct fuse_attr_out *outarg_p) 1548 { 1549 args->opcode = FUSE_SETATTR; 1550 args->nodeid = get_node_id(inode); 1551 args->in_numargs = 1; 1552 args->in_args[0].size = sizeof(*inarg_p); 1553 args->in_args[0].value = inarg_p; 1554 args->out_numargs = 1; 1555 args->out_args[0].size = sizeof(*outarg_p); 1556 args->out_args[0].value = outarg_p; 1557 } 1558 1559 /* 1560 * Flush inode->i_mtime to the server 1561 */ 1562 int fuse_flush_times(struct inode *inode, struct fuse_file *ff) 1563 { 1564 struct fuse_mount *fm = get_fuse_mount(inode); 1565 FUSE_ARGS(args); 1566 struct fuse_setattr_in inarg; 1567 struct fuse_attr_out outarg; 1568 1569 memset(&inarg, 0, sizeof(inarg)); 1570 memset(&outarg, 0, sizeof(outarg)); 1571 1572 inarg.valid = FATTR_MTIME; 1573 inarg.mtime = inode->i_mtime.tv_sec; 1574 inarg.mtimensec = inode->i_mtime.tv_nsec; 1575 if (fm->fc->minor >= 23) { 1576 inarg.valid |= FATTR_CTIME; 1577 inarg.ctime = inode->i_ctime.tv_sec; 1578 inarg.ctimensec = inode->i_ctime.tv_nsec; 1579 } 1580 if (ff) { 1581 inarg.valid |= FATTR_FH; 1582 inarg.fh = ff->fh; 1583 } 1584 fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg); 1585 1586 return fuse_simple_request(fm, &args); 1587 } 1588 1589 /* 1590 * Set attributes, and at the same time refresh them. 1591 * 1592 * Truncation is slightly complicated, because the 'truncate' request 1593 * may fail, in which case we don't want to touch the mapping. 1594 * vmtruncate() doesn't allow for this case, so do the rlimit checking 1595 * and the actual truncation by hand. 1596 */ 1597 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, 1598 struct file *file) 1599 { 1600 struct inode *inode = d_inode(dentry); 1601 struct fuse_mount *fm = get_fuse_mount(inode); 1602 struct fuse_conn *fc = fm->fc; 1603 struct fuse_inode *fi = get_fuse_inode(inode); 1604 FUSE_ARGS(args); 1605 struct fuse_setattr_in inarg; 1606 struct fuse_attr_out outarg; 1607 bool is_truncate = false; 1608 bool is_wb = fc->writeback_cache; 1609 loff_t oldsize; 1610 int err; 1611 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); 1612 bool fault_blocked = false; 1613 1614 if (!fc->default_permissions) 1615 attr->ia_valid |= ATTR_FORCE; 1616 1617 err = setattr_prepare(&init_user_ns, dentry, attr); 1618 if (err) 1619 return err; 1620 1621 if (attr->ia_valid & ATTR_SIZE) { 1622 if (WARN_ON(!S_ISREG(inode->i_mode))) 1623 return -EIO; 1624 is_truncate = true; 1625 } 1626 1627 if (FUSE_IS_DAX(inode) && is_truncate) { 1628 down_write(&fi->i_mmap_sem); 1629 fault_blocked = true; 1630 err = fuse_dax_break_layouts(inode, 0, 0); 1631 if (err) { 1632 up_write(&fi->i_mmap_sem); 1633 return err; 1634 } 1635 } 1636 1637 if (attr->ia_valid & ATTR_OPEN) { 1638 /* This is coming from open(..., ... | O_TRUNC); */ 1639 WARN_ON(!(attr->ia_valid & ATTR_SIZE)); 1640 WARN_ON(attr->ia_size != 0); 1641 if (fc->atomic_o_trunc) { 1642 /* 1643 * No need to send request to userspace, since actual 1644 * truncation has already been done by OPEN. But still 1645 * need to truncate page cache. 1646 */ 1647 i_size_write(inode, 0); 1648 truncate_pagecache(inode, 0); 1649 goto out; 1650 } 1651 file = NULL; 1652 } 1653 1654 /* Flush dirty data/metadata before non-truncate SETATTR */ 1655 if (is_wb && S_ISREG(inode->i_mode) && 1656 attr->ia_valid & 1657 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | 1658 ATTR_TIMES_SET)) { 1659 err = write_inode_now(inode, true); 1660 if (err) 1661 return err; 1662 1663 fuse_set_nowrite(inode); 1664 fuse_release_nowrite(inode); 1665 } 1666 1667 if (is_truncate) { 1668 fuse_set_nowrite(inode); 1669 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1670 if (trust_local_cmtime && attr->ia_size != inode->i_size) 1671 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; 1672 } 1673 1674 memset(&inarg, 0, sizeof(inarg)); 1675 memset(&outarg, 0, sizeof(outarg)); 1676 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime); 1677 if (file) { 1678 struct fuse_file *ff = file->private_data; 1679 inarg.valid |= FATTR_FH; 1680 inarg.fh = ff->fh; 1681 } 1682 1683 /* Kill suid/sgid for non-directory chown unconditionally */ 1684 if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) && 1685 attr->ia_valid & (ATTR_UID | ATTR_GID)) 1686 inarg.valid |= FATTR_KILL_SUIDGID; 1687 1688 if (attr->ia_valid & ATTR_SIZE) { 1689 /* For mandatory locking in truncate */ 1690 inarg.valid |= FATTR_LOCKOWNER; 1691 inarg.lock_owner = fuse_lock_owner_id(fc, current->files); 1692 1693 /* Kill suid/sgid for truncate only if no CAP_FSETID */ 1694 if (fc->handle_killpriv_v2 && !capable(CAP_FSETID)) 1695 inarg.valid |= FATTR_KILL_SUIDGID; 1696 } 1697 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); 1698 err = fuse_simple_request(fm, &args); 1699 if (err) { 1700 if (err == -EINTR) 1701 fuse_invalidate_attr(inode); 1702 goto error; 1703 } 1704 1705 if (fuse_invalid_attr(&outarg.attr) || 1706 (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { 1707 fuse_make_bad(inode); 1708 err = -EIO; 1709 goto error; 1710 } 1711 1712 spin_lock(&fi->lock); 1713 /* the kernel maintains i_mtime locally */ 1714 if (trust_local_cmtime) { 1715 if (attr->ia_valid & ATTR_MTIME) 1716 inode->i_mtime = attr->ia_mtime; 1717 if (attr->ia_valid & ATTR_CTIME) 1718 inode->i_ctime = attr->ia_ctime; 1719 /* FIXME: clear I_DIRTY_SYNC? */ 1720 } 1721 1722 fuse_change_attributes_common(inode, &outarg.attr, 1723 attr_timeout(&outarg)); 1724 oldsize = inode->i_size; 1725 /* see the comment in fuse_change_attributes() */ 1726 if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) 1727 i_size_write(inode, outarg.attr.size); 1728 1729 if (is_truncate) { 1730 /* NOTE: this may release/reacquire fi->lock */ 1731 __fuse_release_nowrite(inode); 1732 } 1733 spin_unlock(&fi->lock); 1734 1735 /* 1736 * Only call invalidate_inode_pages2() after removing 1737 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. 1738 */ 1739 if ((is_truncate || !is_wb) && 1740 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { 1741 truncate_pagecache(inode, outarg.attr.size); 1742 invalidate_inode_pages2(inode->i_mapping); 1743 } 1744 1745 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1746 out: 1747 if (fault_blocked) 1748 up_write(&fi->i_mmap_sem); 1749 1750 return 0; 1751 1752 error: 1753 if (is_truncate) 1754 fuse_release_nowrite(inode); 1755 1756 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1757 1758 if (fault_blocked) 1759 up_write(&fi->i_mmap_sem); 1760 return err; 1761 } 1762 1763 static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry, 1764 struct iattr *attr) 1765 { 1766 struct inode *inode = d_inode(entry); 1767 struct fuse_conn *fc = get_fuse_conn(inode); 1768 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; 1769 int ret; 1770 1771 if (fuse_is_bad(inode)) 1772 return -EIO; 1773 1774 if (!fuse_allow_current_process(get_fuse_conn(inode))) 1775 return -EACCES; 1776 1777 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) { 1778 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | 1779 ATTR_MODE); 1780 1781 /* 1782 * The only sane way to reliably kill suid/sgid is to do it in 1783 * the userspace filesystem 1784 * 1785 * This should be done on write(), truncate() and chown(). 1786 */ 1787 if (!fc->handle_killpriv && !fc->handle_killpriv_v2) { 1788 /* 1789 * ia_mode calculation may have used stale i_mode. 1790 * Refresh and recalculate. 1791 */ 1792 ret = fuse_do_getattr(inode, NULL, file); 1793 if (ret) 1794 return ret; 1795 1796 attr->ia_mode = inode->i_mode; 1797 if (inode->i_mode & S_ISUID) { 1798 attr->ia_valid |= ATTR_MODE; 1799 attr->ia_mode &= ~S_ISUID; 1800 } 1801 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 1802 attr->ia_valid |= ATTR_MODE; 1803 attr->ia_mode &= ~S_ISGID; 1804 } 1805 } 1806 } 1807 if (!attr->ia_valid) 1808 return 0; 1809 1810 ret = fuse_do_setattr(entry, attr, file); 1811 if (!ret) { 1812 /* 1813 * If filesystem supports acls it may have updated acl xattrs in 1814 * the filesystem, so forget cached acls for the inode. 1815 */ 1816 if (fc->posix_acl) 1817 forget_all_cached_acls(inode); 1818 1819 /* Directory mode changed, may need to revalidate access */ 1820 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE)) 1821 fuse_invalidate_entry_cache(entry); 1822 } 1823 return ret; 1824 } 1825 1826 static int fuse_getattr(struct user_namespace *mnt_userns, 1827 const struct path *path, struct kstat *stat, 1828 u32 request_mask, unsigned int flags) 1829 { 1830 struct inode *inode = d_inode(path->dentry); 1831 struct fuse_conn *fc = get_fuse_conn(inode); 1832 1833 if (fuse_is_bad(inode)) 1834 return -EIO; 1835 1836 if (!fuse_allow_current_process(fc)) { 1837 if (!request_mask) { 1838 /* 1839 * If user explicitly requested *nothing* then don't 1840 * error out, but return st_dev only. 1841 */ 1842 stat->result_mask = 0; 1843 stat->dev = inode->i_sb->s_dev; 1844 return 0; 1845 } 1846 return -EACCES; 1847 } 1848 1849 return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); 1850 } 1851 1852 static const struct inode_operations fuse_dir_inode_operations = { 1853 .lookup = fuse_lookup, 1854 .mkdir = fuse_mkdir, 1855 .symlink = fuse_symlink, 1856 .unlink = fuse_unlink, 1857 .rmdir = fuse_rmdir, 1858 .rename = fuse_rename2, 1859 .link = fuse_link, 1860 .setattr = fuse_setattr, 1861 .create = fuse_create, 1862 .atomic_open = fuse_atomic_open, 1863 .mknod = fuse_mknod, 1864 .permission = fuse_permission, 1865 .getattr = fuse_getattr, 1866 .listxattr = fuse_listxattr, 1867 .get_acl = fuse_get_acl, 1868 .set_acl = fuse_set_acl, 1869 }; 1870 1871 static const struct file_operations fuse_dir_operations = { 1872 .llseek = generic_file_llseek, 1873 .read = generic_read_dir, 1874 .iterate_shared = fuse_readdir, 1875 .open = fuse_dir_open, 1876 .release = fuse_dir_release, 1877 .fsync = fuse_dir_fsync, 1878 .unlocked_ioctl = fuse_dir_ioctl, 1879 .compat_ioctl = fuse_dir_compat_ioctl, 1880 }; 1881 1882 static const struct inode_operations fuse_common_inode_operations = { 1883 .setattr = fuse_setattr, 1884 .permission = fuse_permission, 1885 .getattr = fuse_getattr, 1886 .listxattr = fuse_listxattr, 1887 .get_acl = fuse_get_acl, 1888 .set_acl = fuse_set_acl, 1889 }; 1890 1891 static const struct inode_operations fuse_symlink_inode_operations = { 1892 .setattr = fuse_setattr, 1893 .get_link = fuse_get_link, 1894 .getattr = fuse_getattr, 1895 .listxattr = fuse_listxattr, 1896 }; 1897 1898 void fuse_init_common(struct inode *inode) 1899 { 1900 inode->i_op = &fuse_common_inode_operations; 1901 } 1902 1903 void fuse_init_dir(struct inode *inode) 1904 { 1905 struct fuse_inode *fi = get_fuse_inode(inode); 1906 1907 inode->i_op = &fuse_dir_inode_operations; 1908 inode->i_fop = &fuse_dir_operations; 1909 1910 spin_lock_init(&fi->rdc.lock); 1911 fi->rdc.cached = false; 1912 fi->rdc.size = 0; 1913 fi->rdc.pos = 0; 1914 fi->rdc.version = 0; 1915 } 1916 1917 static int fuse_symlink_readpage(struct file *null, struct page *page) 1918 { 1919 int err = fuse_readlink_page(page->mapping->host, page); 1920 1921 if (!err) 1922 SetPageUptodate(page); 1923 1924 unlock_page(page); 1925 1926 return err; 1927 } 1928 1929 static const struct address_space_operations fuse_symlink_aops = { 1930 .readpage = fuse_symlink_readpage, 1931 }; 1932 1933 void fuse_init_symlink(struct inode *inode) 1934 { 1935 inode->i_op = &fuse_symlink_inode_operations; 1936 inode->i_data.a_ops = &fuse_symlink_aops; 1937 inode_nohighmem(inode); 1938 } 1939