1 /* 2 * linux/fs/open.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/string.h> 8 #include <linux/mm.h> 9 #include <linux/file.h> 10 #include <linux/fdtable.h> 11 #include <linux/quotaops.h> 12 #include <linux/fsnotify.h> 13 #include <linux/module.h> 14 #include <linux/slab.h> 15 #include <linux/tty.h> 16 #include <linux/namei.h> 17 #include <linux/backing-dev.h> 18 #include <linux/capability.h> 19 #include <linux/security.h> 20 #include <linux/mount.h> 21 #include <linux/vfs.h> 22 #include <linux/fcntl.h> 23 #include <asm/uaccess.h> 24 #include <linux/fs.h> 25 #include <linux/personality.h> 26 #include <linux/pagemap.h> 27 #include <linux/syscalls.h> 28 #include <linux/rcupdate.h> 29 #include <linux/audit.h> 30 #include <linux/falloc.h> 31 32 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 33 { 34 int retval = -ENODEV; 35 36 if (dentry) { 37 retval = -ENOSYS; 38 if (dentry->d_sb->s_op->statfs) { 39 memset(buf, 0, sizeof(*buf)); 40 retval = security_sb_statfs(dentry); 41 if (retval) 42 return retval; 43 retval = dentry->d_sb->s_op->statfs(dentry, buf); 44 if (retval == 0 && buf->f_frsize == 0) 45 buf->f_frsize = buf->f_bsize; 46 } 47 } 48 return retval; 49 } 50 51 EXPORT_SYMBOL(vfs_statfs); 52 53 static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) 54 { 55 struct kstatfs st; 56 int retval; 57 58 retval = vfs_statfs(dentry, &st); 59 if (retval) 60 return retval; 61 62 if (sizeof(*buf) == sizeof(st)) 63 memcpy(buf, &st, sizeof(st)); 64 else { 65 if (sizeof buf->f_blocks == 4) { 66 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 67 0xffffffff00000000ULL) 68 return -EOVERFLOW; 69 /* 70 * f_files and f_ffree may be -1; it's okay to stuff 71 * that into 32 bits 72 */ 73 if (st.f_files != -1 && 74 (st.f_files & 0xffffffff00000000ULL)) 75 return -EOVERFLOW; 76 if (st.f_ffree != -1 && 77 (st.f_ffree & 0xffffffff00000000ULL)) 78 return -EOVERFLOW; 79 } 80 81 buf->f_type = st.f_type; 82 buf->f_bsize = st.f_bsize; 83 buf->f_blocks = st.f_blocks; 84 buf->f_bfree = st.f_bfree; 85 buf->f_bavail = st.f_bavail; 86 buf->f_files = st.f_files; 87 buf->f_ffree = st.f_ffree; 88 buf->f_fsid = st.f_fsid; 89 buf->f_namelen = st.f_namelen; 90 buf->f_frsize = st.f_frsize; 91 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 92 } 93 return 0; 94 } 95 96 static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) 97 { 98 struct kstatfs st; 99 int retval; 100 101 retval = vfs_statfs(dentry, &st); 102 if (retval) 103 return retval; 104 105 if (sizeof(*buf) == sizeof(st)) 106 memcpy(buf, &st, sizeof(st)); 107 else { 108 buf->f_type = st.f_type; 109 buf->f_bsize = st.f_bsize; 110 buf->f_blocks = st.f_blocks; 111 buf->f_bfree = st.f_bfree; 112 buf->f_bavail = st.f_bavail; 113 buf->f_files = st.f_files; 114 buf->f_ffree = st.f_ffree; 115 buf->f_fsid = st.f_fsid; 116 buf->f_namelen = st.f_namelen; 117 buf->f_frsize = st.f_frsize; 118 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 119 } 120 return 0; 121 } 122 123 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 124 { 125 struct nameidata nd; 126 int error; 127 128 error = user_path_walk(path, &nd); 129 if (!error) { 130 struct statfs tmp; 131 error = vfs_statfs_native(nd.path.dentry, &tmp); 132 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 133 error = -EFAULT; 134 path_put(&nd.path); 135 } 136 return error; 137 } 138 139 140 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 141 { 142 struct nameidata nd; 143 long error; 144 145 if (sz != sizeof(*buf)) 146 return -EINVAL; 147 error = user_path_walk(path, &nd); 148 if (!error) { 149 struct statfs64 tmp; 150 error = vfs_statfs64(nd.path.dentry, &tmp); 151 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 152 error = -EFAULT; 153 path_put(&nd.path); 154 } 155 return error; 156 } 157 158 159 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) 160 { 161 struct file * file; 162 struct statfs tmp; 163 int error; 164 165 error = -EBADF; 166 file = fget(fd); 167 if (!file) 168 goto out; 169 error = vfs_statfs_native(file->f_path.dentry, &tmp); 170 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 171 error = -EFAULT; 172 fput(file); 173 out: 174 return error; 175 } 176 177 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf) 178 { 179 struct file * file; 180 struct statfs64 tmp; 181 int error; 182 183 if (sz != sizeof(*buf)) 184 return -EINVAL; 185 186 error = -EBADF; 187 file = fget(fd); 188 if (!file) 189 goto out; 190 error = vfs_statfs64(file->f_path.dentry, &tmp); 191 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 192 error = -EFAULT; 193 fput(file); 194 out: 195 return error; 196 } 197 198 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 199 struct file *filp) 200 { 201 int err; 202 struct iattr newattrs; 203 204 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 205 if (length < 0) 206 return -EINVAL; 207 208 newattrs.ia_size = length; 209 newattrs.ia_valid = ATTR_SIZE | time_attrs; 210 if (filp) { 211 newattrs.ia_file = filp; 212 newattrs.ia_valid |= ATTR_FILE; 213 } 214 215 /* Remove suid/sgid on truncate too */ 216 newattrs.ia_valid |= should_remove_suid(dentry); 217 218 mutex_lock(&dentry->d_inode->i_mutex); 219 err = notify_change(dentry, &newattrs); 220 mutex_unlock(&dentry->d_inode->i_mutex); 221 return err; 222 } 223 224 static long do_sys_truncate(const char __user * path, loff_t length) 225 { 226 struct nameidata nd; 227 struct inode * inode; 228 int error; 229 230 error = -EINVAL; 231 if (length < 0) /* sorry, but loff_t says... */ 232 goto out; 233 234 error = user_path_walk(path, &nd); 235 if (error) 236 goto out; 237 inode = nd.path.dentry->d_inode; 238 239 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 240 error = -EISDIR; 241 if (S_ISDIR(inode->i_mode)) 242 goto dput_and_out; 243 244 error = -EINVAL; 245 if (!S_ISREG(inode->i_mode)) 246 goto dput_and_out; 247 248 error = mnt_want_write(nd.path.mnt); 249 if (error) 250 goto dput_and_out; 251 252 error = vfs_permission(&nd, MAY_WRITE); 253 if (error) 254 goto mnt_drop_write_and_out; 255 256 error = -EPERM; 257 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 258 goto mnt_drop_write_and_out; 259 260 error = get_write_access(inode); 261 if (error) 262 goto mnt_drop_write_and_out; 263 264 /* 265 * Make sure that there are no leases. get_write_access() protects 266 * against the truncate racing with a lease-granting setlease(). 267 */ 268 error = break_lease(inode, FMODE_WRITE); 269 if (error) 270 goto put_write_and_out; 271 272 error = locks_verify_truncate(inode, NULL, length); 273 if (!error) { 274 DQUOT_INIT(inode); 275 error = do_truncate(nd.path.dentry, length, 0, NULL); 276 } 277 278 put_write_and_out: 279 put_write_access(inode); 280 mnt_drop_write_and_out: 281 mnt_drop_write(nd.path.mnt); 282 dput_and_out: 283 path_put(&nd.path); 284 out: 285 return error; 286 } 287 288 asmlinkage long sys_truncate(const char __user * path, unsigned long length) 289 { 290 /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ 291 return do_sys_truncate(path, (long)length); 292 } 293 294 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 295 { 296 struct inode * inode; 297 struct dentry *dentry; 298 struct file * file; 299 int error; 300 301 error = -EINVAL; 302 if (length < 0) 303 goto out; 304 error = -EBADF; 305 file = fget(fd); 306 if (!file) 307 goto out; 308 309 /* explicitly opened as large or we are on 64-bit box */ 310 if (file->f_flags & O_LARGEFILE) 311 small = 0; 312 313 dentry = file->f_path.dentry; 314 inode = dentry->d_inode; 315 error = -EINVAL; 316 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 317 goto out_putf; 318 319 error = -EINVAL; 320 /* Cannot ftruncate over 2^31 bytes without large file support */ 321 if (small && length > MAX_NON_LFS) 322 goto out_putf; 323 324 error = -EPERM; 325 if (IS_APPEND(inode)) 326 goto out_putf; 327 328 error = locks_verify_truncate(inode, file, length); 329 if (!error) 330 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 331 out_putf: 332 fput(file); 333 out: 334 return error; 335 } 336 337 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 338 { 339 long ret = do_sys_ftruncate(fd, length, 1); 340 /* avoid REGPARM breakage on x86: */ 341 asmlinkage_protect(2, ret, fd, length); 342 return ret; 343 } 344 345 /* LFS versions of truncate are only needed on 32 bit machines */ 346 #if BITS_PER_LONG == 32 347 asmlinkage long sys_truncate64(const char __user * path, loff_t length) 348 { 349 return do_sys_truncate(path, length); 350 } 351 352 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 353 { 354 long ret = do_sys_ftruncate(fd, length, 0); 355 /* avoid REGPARM breakage on x86: */ 356 asmlinkage_protect(2, ret, fd, length); 357 return ret; 358 } 359 #endif 360 361 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) 362 { 363 struct file *file; 364 struct inode *inode; 365 long ret = -EINVAL; 366 367 if (offset < 0 || len <= 0) 368 goto out; 369 370 /* Return error if mode is not supported */ 371 ret = -EOPNOTSUPP; 372 if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) 373 goto out; 374 375 ret = -EBADF; 376 file = fget(fd); 377 if (!file) 378 goto out; 379 if (!(file->f_mode & FMODE_WRITE)) 380 goto out_fput; 381 /* 382 * Revalidate the write permissions, in case security policy has 383 * changed since the files were opened. 384 */ 385 ret = security_file_permission(file, MAY_WRITE); 386 if (ret) 387 goto out_fput; 388 389 inode = file->f_path.dentry->d_inode; 390 391 ret = -ESPIPE; 392 if (S_ISFIFO(inode->i_mode)) 393 goto out_fput; 394 395 ret = -ENODEV; 396 /* 397 * Let individual file system decide if it supports preallocation 398 * for directories or not. 399 */ 400 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 401 goto out_fput; 402 403 ret = -EFBIG; 404 /* Check for wrap through zero too */ 405 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 406 goto out_fput; 407 408 if (inode->i_op && inode->i_op->fallocate) 409 ret = inode->i_op->fallocate(inode, mode, offset, len); 410 else 411 ret = -EOPNOTSUPP; 412 413 out_fput: 414 fput(file); 415 out: 416 return ret; 417 } 418 419 /* 420 * access() needs to use the real uid/gid, not the effective uid/gid. 421 * We do this by temporarily clearing all FS-related capabilities and 422 * switching the fsuid/fsgid around to the real ones. 423 */ 424 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 425 { 426 struct nameidata nd; 427 int old_fsuid, old_fsgid; 428 kernel_cap_t old_cap; 429 int res; 430 431 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 432 return -EINVAL; 433 434 old_fsuid = current->fsuid; 435 old_fsgid = current->fsgid; 436 old_cap = current->cap_effective; 437 438 current->fsuid = current->uid; 439 current->fsgid = current->gid; 440 441 /* 442 * Clear the capabilities if we switch to a non-root user 443 * 444 * FIXME: There is a race here against sys_capset. The 445 * capabilities can change yet we will restore the old 446 * value below. We should hold task_capabilities_lock, 447 * but we cannot because user_path_walk can sleep. 448 */ 449 if (current->uid) 450 cap_clear(current->cap_effective); 451 else 452 current->cap_effective = current->cap_permitted; 453 454 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 455 if (res) 456 goto out; 457 458 res = vfs_permission(&nd, mode); 459 /* SuS v2 requires we report a read only fs too */ 460 if(res || !(mode & S_IWOTH) || 461 special_file(nd.path.dentry->d_inode->i_mode)) 462 goto out_path_release; 463 /* 464 * This is a rare case where using __mnt_is_readonly() 465 * is OK without a mnt_want/drop_write() pair. Since 466 * no actual write to the fs is performed here, we do 467 * not need to telegraph to that to anyone. 468 * 469 * By doing this, we accept that this access is 470 * inherently racy and know that the fs may change 471 * state before we even see this result. 472 */ 473 if (__mnt_is_readonly(nd.path.mnt)) 474 res = -EROFS; 475 476 out_path_release: 477 path_put(&nd.path); 478 out: 479 current->fsuid = old_fsuid; 480 current->fsgid = old_fsgid; 481 current->cap_effective = old_cap; 482 483 return res; 484 } 485 486 asmlinkage long sys_access(const char __user *filename, int mode) 487 { 488 return sys_faccessat(AT_FDCWD, filename, mode); 489 } 490 491 asmlinkage long sys_chdir(const char __user * filename) 492 { 493 struct nameidata nd; 494 int error; 495 496 error = __user_walk(filename, 497 LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); 498 if (error) 499 goto out; 500 501 error = vfs_permission(&nd, MAY_EXEC); 502 if (error) 503 goto dput_and_out; 504 505 set_fs_pwd(current->fs, &nd.path); 506 507 dput_and_out: 508 path_put(&nd.path); 509 out: 510 return error; 511 } 512 513 asmlinkage long sys_fchdir(unsigned int fd) 514 { 515 struct file *file; 516 struct inode *inode; 517 int error; 518 519 error = -EBADF; 520 file = fget(fd); 521 if (!file) 522 goto out; 523 524 inode = file->f_path.dentry->d_inode; 525 526 error = -ENOTDIR; 527 if (!S_ISDIR(inode->i_mode)) 528 goto out_putf; 529 530 error = file_permission(file, MAY_EXEC); 531 if (!error) 532 set_fs_pwd(current->fs, &file->f_path); 533 out_putf: 534 fput(file); 535 out: 536 return error; 537 } 538 539 asmlinkage long sys_chroot(const char __user * filename) 540 { 541 struct nameidata nd; 542 int error; 543 544 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 545 if (error) 546 goto out; 547 548 error = vfs_permission(&nd, MAY_EXEC); 549 if (error) 550 goto dput_and_out; 551 552 error = -EPERM; 553 if (!capable(CAP_SYS_CHROOT)) 554 goto dput_and_out; 555 556 set_fs_root(current->fs, &nd.path); 557 set_fs_altroot(); 558 error = 0; 559 dput_and_out: 560 path_put(&nd.path); 561 out: 562 return error; 563 } 564 565 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) 566 { 567 struct inode * inode; 568 struct dentry * dentry; 569 struct file * file; 570 int err = -EBADF; 571 struct iattr newattrs; 572 573 file = fget(fd); 574 if (!file) 575 goto out; 576 577 dentry = file->f_path.dentry; 578 inode = dentry->d_inode; 579 580 audit_inode(NULL, dentry); 581 582 err = mnt_want_write(file->f_path.mnt); 583 if (err) 584 goto out_putf; 585 err = -EPERM; 586 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 587 goto out_drop_write; 588 mutex_lock(&inode->i_mutex); 589 if (mode == (mode_t) -1) 590 mode = inode->i_mode; 591 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 592 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 593 err = notify_change(dentry, &newattrs); 594 mutex_unlock(&inode->i_mutex); 595 596 out_drop_write: 597 mnt_drop_write(file->f_path.mnt); 598 out_putf: 599 fput(file); 600 out: 601 return err; 602 } 603 604 asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 605 mode_t mode) 606 { 607 struct nameidata nd; 608 struct inode * inode; 609 int error; 610 struct iattr newattrs; 611 612 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 613 if (error) 614 goto out; 615 inode = nd.path.dentry->d_inode; 616 617 error = mnt_want_write(nd.path.mnt); 618 if (error) 619 goto dput_and_out; 620 621 error = -EPERM; 622 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 623 goto out_drop_write; 624 625 mutex_lock(&inode->i_mutex); 626 if (mode == (mode_t) -1) 627 mode = inode->i_mode; 628 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 629 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 630 error = notify_change(nd.path.dentry, &newattrs); 631 mutex_unlock(&inode->i_mutex); 632 633 out_drop_write: 634 mnt_drop_write(nd.path.mnt); 635 dput_and_out: 636 path_put(&nd.path); 637 out: 638 return error; 639 } 640 641 asmlinkage long sys_chmod(const char __user *filename, mode_t mode) 642 { 643 return sys_fchmodat(AT_FDCWD, filename, mode); 644 } 645 646 static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 647 { 648 struct inode * inode; 649 int error; 650 struct iattr newattrs; 651 652 error = -ENOENT; 653 if (!(inode = dentry->d_inode)) { 654 printk(KERN_ERR "chown_common: NULL inode\n"); 655 goto out; 656 } 657 error = -EPERM; 658 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 659 goto out; 660 newattrs.ia_valid = ATTR_CTIME; 661 if (user != (uid_t) -1) { 662 newattrs.ia_valid |= ATTR_UID; 663 newattrs.ia_uid = user; 664 } 665 if (group != (gid_t) -1) { 666 newattrs.ia_valid |= ATTR_GID; 667 newattrs.ia_gid = group; 668 } 669 if (!S_ISDIR(inode->i_mode)) 670 newattrs.ia_valid |= 671 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 672 mutex_lock(&inode->i_mutex); 673 error = notify_change(dentry, &newattrs); 674 mutex_unlock(&inode->i_mutex); 675 out: 676 return error; 677 } 678 679 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 680 { 681 struct nameidata nd; 682 int error; 683 684 error = user_path_walk(filename, &nd); 685 if (error) 686 goto out; 687 error = mnt_want_write(nd.path.mnt); 688 if (error) 689 goto out_release; 690 error = chown_common(nd.path.dentry, user, group); 691 mnt_drop_write(nd.path.mnt); 692 out_release: 693 path_put(&nd.path); 694 out: 695 return error; 696 } 697 698 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 699 gid_t group, int flag) 700 { 701 struct nameidata nd; 702 int error = -EINVAL; 703 int follow; 704 705 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 706 goto out; 707 708 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 709 error = __user_walk_fd(dfd, filename, follow, &nd); 710 if (error) 711 goto out; 712 error = mnt_want_write(nd.path.mnt); 713 if (error) 714 goto out_release; 715 error = chown_common(nd.path.dentry, user, group); 716 mnt_drop_write(nd.path.mnt); 717 out_release: 718 path_put(&nd.path); 719 out: 720 return error; 721 } 722 723 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 724 { 725 struct nameidata nd; 726 int error; 727 728 error = user_path_walk_link(filename, &nd); 729 if (error) 730 goto out; 731 error = mnt_want_write(nd.path.mnt); 732 if (error) 733 goto out_release; 734 error = chown_common(nd.path.dentry, user, group); 735 mnt_drop_write(nd.path.mnt); 736 out_release: 737 path_put(&nd.path); 738 out: 739 return error; 740 } 741 742 743 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) 744 { 745 struct file * file; 746 int error = -EBADF; 747 struct dentry * dentry; 748 749 file = fget(fd); 750 if (!file) 751 goto out; 752 753 error = mnt_want_write(file->f_path.mnt); 754 if (error) 755 goto out_fput; 756 dentry = file->f_path.dentry; 757 audit_inode(NULL, dentry); 758 error = chown_common(dentry, user, group); 759 mnt_drop_write(file->f_path.mnt); 760 out_fput: 761 fput(file); 762 out: 763 return error; 764 } 765 766 /* 767 * You have to be very careful that these write 768 * counts get cleaned up in error cases and 769 * upon __fput(). This should probably never 770 * be called outside of __dentry_open(). 771 */ 772 static inline int __get_file_write_access(struct inode *inode, 773 struct vfsmount *mnt) 774 { 775 int error; 776 error = get_write_access(inode); 777 if (error) 778 return error; 779 /* 780 * Do not take mount writer counts on 781 * special files since no writes to 782 * the mount itself will occur. 783 */ 784 if (!special_file(inode->i_mode)) { 785 /* 786 * Balanced in __fput() 787 */ 788 error = mnt_want_write(mnt); 789 if (error) 790 put_write_access(inode); 791 } 792 return error; 793 } 794 795 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 796 int flags, struct file *f, 797 int (*open)(struct inode *, struct file *)) 798 { 799 struct inode *inode; 800 int error; 801 802 f->f_flags = flags; 803 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 804 FMODE_PREAD | FMODE_PWRITE; 805 inode = dentry->d_inode; 806 if (f->f_mode & FMODE_WRITE) { 807 error = __get_file_write_access(inode, mnt); 808 if (error) 809 goto cleanup_file; 810 if (!special_file(inode->i_mode)) 811 file_take_write(f); 812 } 813 814 f->f_mapping = inode->i_mapping; 815 f->f_path.dentry = dentry; 816 f->f_path.mnt = mnt; 817 f->f_pos = 0; 818 f->f_op = fops_get(inode->i_fop); 819 file_move(f, &inode->i_sb->s_files); 820 821 error = security_dentry_open(f); 822 if (error) 823 goto cleanup_all; 824 825 if (!open && f->f_op) 826 open = f->f_op->open; 827 if (open) { 828 error = open(inode, f); 829 if (error) 830 goto cleanup_all; 831 } 832 833 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 834 835 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 836 837 /* NB: we're sure to have correct a_ops only after f_op->open */ 838 if (f->f_flags & O_DIRECT) { 839 if (!f->f_mapping->a_ops || 840 ((!f->f_mapping->a_ops->direct_IO) && 841 (!f->f_mapping->a_ops->get_xip_mem))) { 842 fput(f); 843 f = ERR_PTR(-EINVAL); 844 } 845 } 846 847 return f; 848 849 cleanup_all: 850 fops_put(f->f_op); 851 if (f->f_mode & FMODE_WRITE) { 852 put_write_access(inode); 853 if (!special_file(inode->i_mode)) { 854 /* 855 * We don't consider this a real 856 * mnt_want/drop_write() pair 857 * because it all happenend right 858 * here, so just reset the state. 859 */ 860 file_reset_write(f); 861 mnt_drop_write(mnt); 862 } 863 } 864 file_kill(f); 865 f->f_path.dentry = NULL; 866 f->f_path.mnt = NULL; 867 cleanup_file: 868 put_filp(f); 869 dput(dentry); 870 mntput(mnt); 871 return ERR_PTR(error); 872 } 873 874 /** 875 * lookup_instantiate_filp - instantiates the open intent filp 876 * @nd: pointer to nameidata 877 * @dentry: pointer to dentry 878 * @open: open callback 879 * 880 * Helper for filesystems that want to use lookup open intents and pass back 881 * a fully instantiated struct file to the caller. 882 * This function is meant to be called from within a filesystem's 883 * lookup method. 884 * Beware of calling it for non-regular files! Those ->open methods might block 885 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo, 886 * leading to a deadlock, as nobody can open that fifo anymore, because 887 * another process to open fifo will block on locked parent when doing lookup). 888 * Note that in case of error, nd->intent.open.file is destroyed, but the 889 * path information remains valid. 890 * If the open callback is set to NULL, then the standard f_op->open() 891 * filesystem callback is substituted. 892 */ 893 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 894 int (*open)(struct inode *, struct file *)) 895 { 896 if (IS_ERR(nd->intent.open.file)) 897 goto out; 898 if (IS_ERR(dentry)) 899 goto out_err; 900 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 901 nd->intent.open.flags - 1, 902 nd->intent.open.file, 903 open); 904 out: 905 return nd->intent.open.file; 906 out_err: 907 release_open_intent(nd); 908 nd->intent.open.file = (struct file *)dentry; 909 goto out; 910 } 911 EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 912 913 /** 914 * nameidata_to_filp - convert a nameidata to an open filp. 915 * @nd: pointer to nameidata 916 * @flags: open flags 917 * 918 * Note that this function destroys the original nameidata 919 */ 920 struct file *nameidata_to_filp(struct nameidata *nd, int flags) 921 { 922 struct file *filp; 923 924 /* Pick up the filp from the open intent */ 925 filp = nd->intent.open.file; 926 /* Has the filesystem initialised the file for us? */ 927 if (filp->f_path.dentry == NULL) 928 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, 929 NULL); 930 else 931 path_put(&nd->path); 932 return filp; 933 } 934 935 /* 936 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an 937 * error. 938 */ 939 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 940 { 941 int error; 942 struct file *f; 943 944 /* 945 * We must always pass in a valid mount pointer. Historically 946 * callers got away with not passing it, but we must enforce this at 947 * the earliest possible point now to avoid strange problems deep in the 948 * filesystem stack. 949 */ 950 if (!mnt) { 951 printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__); 952 dump_stack(); 953 return ERR_PTR(-EINVAL); 954 } 955 956 error = -ENFILE; 957 f = get_empty_filp(); 958 if (f == NULL) { 959 dput(dentry); 960 mntput(mnt); 961 return ERR_PTR(error); 962 } 963 964 return __dentry_open(dentry, mnt, flags, f, NULL); 965 } 966 EXPORT_SYMBOL(dentry_open); 967 968 /* 969 * Find an empty file descriptor entry, and mark it busy. 970 */ 971 int get_unused_fd_flags(int flags) 972 { 973 struct files_struct * files = current->files; 974 int fd, error; 975 struct fdtable *fdt; 976 977 error = -EMFILE; 978 spin_lock(&files->file_lock); 979 980 repeat: 981 fdt = files_fdtable(files); 982 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, 983 files->next_fd); 984 985 /* 986 * N.B. For clone tasks sharing a files structure, this test 987 * will limit the total number of files that can be opened. 988 */ 989 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 990 goto out; 991 992 /* Do we need to expand the fd array or fd set? */ 993 error = expand_files(files, fd); 994 if (error < 0) 995 goto out; 996 997 if (error) { 998 /* 999 * If we needed to expand the fs array we 1000 * might have blocked - try again. 1001 */ 1002 error = -EMFILE; 1003 goto repeat; 1004 } 1005 1006 FD_SET(fd, fdt->open_fds); 1007 if (flags & O_CLOEXEC) 1008 FD_SET(fd, fdt->close_on_exec); 1009 else 1010 FD_CLR(fd, fdt->close_on_exec); 1011 files->next_fd = fd + 1; 1012 #if 1 1013 /* Sanity check */ 1014 if (fdt->fd[fd] != NULL) { 1015 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 1016 fdt->fd[fd] = NULL; 1017 } 1018 #endif 1019 error = fd; 1020 1021 out: 1022 spin_unlock(&files->file_lock); 1023 return error; 1024 } 1025 1026 int get_unused_fd(void) 1027 { 1028 return get_unused_fd_flags(0); 1029 } 1030 1031 EXPORT_SYMBOL(get_unused_fd); 1032 1033 static void __put_unused_fd(struct files_struct *files, unsigned int fd) 1034 { 1035 struct fdtable *fdt = files_fdtable(files); 1036 __FD_CLR(fd, fdt->open_fds); 1037 if (fd < files->next_fd) 1038 files->next_fd = fd; 1039 } 1040 1041 void put_unused_fd(unsigned int fd) 1042 { 1043 struct files_struct *files = current->files; 1044 spin_lock(&files->file_lock); 1045 __put_unused_fd(files, fd); 1046 spin_unlock(&files->file_lock); 1047 } 1048 1049 EXPORT_SYMBOL(put_unused_fd); 1050 1051 /* 1052 * Install a file pointer in the fd array. 1053 * 1054 * The VFS is full of places where we drop the files lock between 1055 * setting the open_fds bitmap and installing the file in the file 1056 * array. At any such point, we are vulnerable to a dup2() race 1057 * installing a file in the array before us. We need to detect this and 1058 * fput() the struct file we are about to overwrite in this case. 1059 * 1060 * It should never happen - if we allow dup2() do it, _really_ bad things 1061 * will follow. 1062 */ 1063 1064 void fd_install(unsigned int fd, struct file *file) 1065 { 1066 struct files_struct *files = current->files; 1067 struct fdtable *fdt; 1068 spin_lock(&files->file_lock); 1069 fdt = files_fdtable(files); 1070 BUG_ON(fdt->fd[fd] != NULL); 1071 rcu_assign_pointer(fdt->fd[fd], file); 1072 spin_unlock(&files->file_lock); 1073 } 1074 1075 EXPORT_SYMBOL(fd_install); 1076 1077 long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 1078 { 1079 char *tmp = getname(filename); 1080 int fd = PTR_ERR(tmp); 1081 1082 if (!IS_ERR(tmp)) { 1083 fd = get_unused_fd_flags(flags); 1084 if (fd >= 0) { 1085 struct file *f = do_filp_open(dfd, tmp, flags, mode); 1086 if (IS_ERR(f)) { 1087 put_unused_fd(fd); 1088 fd = PTR_ERR(f); 1089 } else { 1090 fsnotify_open(f->f_path.dentry); 1091 fd_install(fd, f); 1092 } 1093 } 1094 putname(tmp); 1095 } 1096 return fd; 1097 } 1098 1099 asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1100 { 1101 long ret; 1102 1103 if (force_o_largefile()) 1104 flags |= O_LARGEFILE; 1105 1106 ret = do_sys_open(AT_FDCWD, filename, flags, mode); 1107 /* avoid REGPARM breakage on x86: */ 1108 asmlinkage_protect(3, ret, filename, flags, mode); 1109 return ret; 1110 } 1111 1112 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1113 int mode) 1114 { 1115 long ret; 1116 1117 if (force_o_largefile()) 1118 flags |= O_LARGEFILE; 1119 1120 ret = do_sys_open(dfd, filename, flags, mode); 1121 /* avoid REGPARM breakage on x86: */ 1122 asmlinkage_protect(4, ret, dfd, filename, flags, mode); 1123 return ret; 1124 } 1125 1126 #ifndef __alpha__ 1127 1128 /* 1129 * For backward compatibility? Maybe this should be moved 1130 * into arch/i386 instead? 1131 */ 1132 asmlinkage long sys_creat(const char __user * pathname, int mode) 1133 { 1134 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1135 } 1136 1137 #endif 1138 1139 /* 1140 * "id" is the POSIX thread ID. We use the 1141 * files pointer for this.. 1142 */ 1143 int filp_close(struct file *filp, fl_owner_t id) 1144 { 1145 int retval = 0; 1146 1147 if (!file_count(filp)) { 1148 printk(KERN_ERR "VFS: Close: file count is 0\n"); 1149 return 0; 1150 } 1151 1152 if (filp->f_op && filp->f_op->flush) 1153 retval = filp->f_op->flush(filp, id); 1154 1155 dnotify_flush(filp, id); 1156 locks_remove_posix(filp, id); 1157 fput(filp); 1158 return retval; 1159 } 1160 1161 EXPORT_SYMBOL(filp_close); 1162 1163 /* 1164 * Careful here! We test whether the file pointer is NULL before 1165 * releasing the fd. This ensures that one clone task can't release 1166 * an fd while another clone is opening it. 1167 */ 1168 asmlinkage long sys_close(unsigned int fd) 1169 { 1170 struct file * filp; 1171 struct files_struct *files = current->files; 1172 struct fdtable *fdt; 1173 int retval; 1174 1175 spin_lock(&files->file_lock); 1176 fdt = files_fdtable(files); 1177 if (fd >= fdt->max_fds) 1178 goto out_unlock; 1179 filp = fdt->fd[fd]; 1180 if (!filp) 1181 goto out_unlock; 1182 rcu_assign_pointer(fdt->fd[fd], NULL); 1183 FD_CLR(fd, fdt->close_on_exec); 1184 __put_unused_fd(files, fd); 1185 spin_unlock(&files->file_lock); 1186 retval = filp_close(filp, files); 1187 1188 /* can't restart close syscall because file table entry was cleared */ 1189 if (unlikely(retval == -ERESTARTSYS || 1190 retval == -ERESTARTNOINTR || 1191 retval == -ERESTARTNOHAND || 1192 retval == -ERESTART_RESTARTBLOCK)) 1193 retval = -EINTR; 1194 1195 return retval; 1196 1197 out_unlock: 1198 spin_unlock(&files->file_lock); 1199 return -EBADF; 1200 } 1201 1202 EXPORT_SYMBOL(sys_close); 1203 1204 /* 1205 * This routine simulates a hangup on the tty, to arrange that users 1206 * are given clean terminals at login time. 1207 */ 1208 asmlinkage long sys_vhangup(void) 1209 { 1210 if (capable(CAP_SYS_TTY_CONFIG)) { 1211 /* XXX: this needs locking */ 1212 tty_vhangup(current->signal->tty); 1213 return 0; 1214 } 1215 return -EPERM; 1216 } 1217 1218 /* 1219 * Called when an inode is about to be open. 1220 * We use this to disallow opening large files on 32bit systems if 1221 * the caller didn't specify O_LARGEFILE. On 64bit systems we force 1222 * on this flag in sys_open. 1223 */ 1224 int generic_file_open(struct inode * inode, struct file * filp) 1225 { 1226 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1227 return -EOVERFLOW; 1228 return 0; 1229 } 1230 1231 EXPORT_SYMBOL(generic_file_open); 1232 1233 /* 1234 * This is used by subsystems that don't want seekable 1235 * file descriptors 1236 */ 1237 int nonseekable_open(struct inode *inode, struct file *filp) 1238 { 1239 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1240 return 0; 1241 } 1242 1243 EXPORT_SYMBOL(nonseekable_open); 1244