1 /* 2 * linux/fs/open.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/string.h> 8 #include <linux/mm.h> 9 #include <linux/file.h> 10 #include <linux/quotaops.h> 11 #include <linux/fsnotify.h> 12 #include <linux/module.h> 13 #include <linux/slab.h> 14 #include <linux/tty.h> 15 #include <linux/namei.h> 16 #include <linux/backing-dev.h> 17 #include <linux/capability.h> 18 #include <linux/security.h> 19 #include <linux/mount.h> 20 #include <linux/vfs.h> 21 #include <linux/fcntl.h> 22 #include <asm/uaccess.h> 23 #include <linux/fs.h> 24 #include <linux/personality.h> 25 #include <linux/pagemap.h> 26 #include <linux/syscalls.h> 27 #include <linux/rcupdate.h> 28 #include <linux/audit.h> 29 30 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 31 { 32 int retval = -ENODEV; 33 34 if (dentry) { 35 retval = -ENOSYS; 36 if (dentry->d_sb->s_op->statfs) { 37 memset(buf, 0, sizeof(*buf)); 38 retval = security_sb_statfs(dentry); 39 if (retval) 40 return retval; 41 retval = dentry->d_sb->s_op->statfs(dentry, buf); 42 if (retval == 0 && buf->f_frsize == 0) 43 buf->f_frsize = buf->f_bsize; 44 } 45 } 46 return retval; 47 } 48 49 EXPORT_SYMBOL(vfs_statfs); 50 51 static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) 52 { 53 struct kstatfs st; 54 int retval; 55 56 retval = vfs_statfs(dentry, &st); 57 if (retval) 58 return retval; 59 60 if (sizeof(*buf) == sizeof(st)) 61 memcpy(buf, &st, sizeof(st)); 62 else { 63 if (sizeof buf->f_blocks == 4) { 64 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 65 0xffffffff00000000ULL) 66 return -EOVERFLOW; 67 /* 68 * f_files and f_ffree may be -1; it's okay to stuff 69 * that into 32 bits 70 */ 71 if (st.f_files != -1 && 72 (st.f_files & 0xffffffff00000000ULL)) 73 return -EOVERFLOW; 74 if (st.f_ffree != -1 && 75 (st.f_ffree & 0xffffffff00000000ULL)) 76 return -EOVERFLOW; 77 } 78 79 buf->f_type = st.f_type; 80 buf->f_bsize = st.f_bsize; 81 buf->f_blocks = st.f_blocks; 82 buf->f_bfree = st.f_bfree; 83 buf->f_bavail = st.f_bavail; 84 buf->f_files = st.f_files; 85 buf->f_ffree = st.f_ffree; 86 buf->f_fsid = st.f_fsid; 87 buf->f_namelen = st.f_namelen; 88 buf->f_frsize = st.f_frsize; 89 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 90 } 91 return 0; 92 } 93 94 static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) 95 { 96 struct kstatfs st; 97 int retval; 98 99 retval = vfs_statfs(dentry, &st); 100 if (retval) 101 return retval; 102 103 if (sizeof(*buf) == sizeof(st)) 104 memcpy(buf, &st, sizeof(st)); 105 else { 106 buf->f_type = st.f_type; 107 buf->f_bsize = st.f_bsize; 108 buf->f_blocks = st.f_blocks; 109 buf->f_bfree = st.f_bfree; 110 buf->f_bavail = st.f_bavail; 111 buf->f_files = st.f_files; 112 buf->f_ffree = st.f_ffree; 113 buf->f_fsid = st.f_fsid; 114 buf->f_namelen = st.f_namelen; 115 buf->f_frsize = st.f_frsize; 116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 117 } 118 return 0; 119 } 120 121 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 122 { 123 struct nameidata nd; 124 int error; 125 126 error = user_path_walk(path, &nd); 127 if (!error) { 128 struct statfs tmp; 129 error = vfs_statfs_native(nd.dentry, &tmp); 130 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 131 error = -EFAULT; 132 path_release(&nd); 133 } 134 return error; 135 } 136 137 138 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 139 { 140 struct nameidata nd; 141 long error; 142 143 if (sz != sizeof(*buf)) 144 return -EINVAL; 145 error = user_path_walk(path, &nd); 146 if (!error) { 147 struct statfs64 tmp; 148 error = vfs_statfs64(nd.dentry, &tmp); 149 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 150 error = -EFAULT; 151 path_release(&nd); 152 } 153 return error; 154 } 155 156 157 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) 158 { 159 struct file * file; 160 struct statfs tmp; 161 int error; 162 163 error = -EBADF; 164 file = fget(fd); 165 if (!file) 166 goto out; 167 error = vfs_statfs_native(file->f_path.dentry, &tmp); 168 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 169 error = -EFAULT; 170 fput(file); 171 out: 172 return error; 173 } 174 175 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf) 176 { 177 struct file * file; 178 struct statfs64 tmp; 179 int error; 180 181 if (sz != sizeof(*buf)) 182 return -EINVAL; 183 184 error = -EBADF; 185 file = fget(fd); 186 if (!file) 187 goto out; 188 error = vfs_statfs64(file->f_path.dentry, &tmp); 189 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 190 error = -EFAULT; 191 fput(file); 192 out: 193 return error; 194 } 195 196 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 197 struct file *filp) 198 { 199 int err; 200 struct iattr newattrs; 201 202 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 203 if (length < 0) 204 return -EINVAL; 205 206 newattrs.ia_size = length; 207 newattrs.ia_valid = ATTR_SIZE | time_attrs; 208 if (filp) { 209 newattrs.ia_file = filp; 210 newattrs.ia_valid |= ATTR_FILE; 211 } 212 213 /* Remove suid/sgid on truncate too */ 214 newattrs.ia_valid |= should_remove_suid(dentry); 215 216 mutex_lock(&dentry->d_inode->i_mutex); 217 err = notify_change(dentry, &newattrs); 218 mutex_unlock(&dentry->d_inode->i_mutex); 219 return err; 220 } 221 222 static long do_sys_truncate(const char __user * path, loff_t length) 223 { 224 struct nameidata nd; 225 struct inode * inode; 226 int error; 227 228 error = -EINVAL; 229 if (length < 0) /* sorry, but loff_t says... */ 230 goto out; 231 232 error = user_path_walk(path, &nd); 233 if (error) 234 goto out; 235 inode = nd.dentry->d_inode; 236 237 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 238 error = -EISDIR; 239 if (S_ISDIR(inode->i_mode)) 240 goto dput_and_out; 241 242 error = -EINVAL; 243 if (!S_ISREG(inode->i_mode)) 244 goto dput_and_out; 245 246 error = vfs_permission(&nd, MAY_WRITE); 247 if (error) 248 goto dput_and_out; 249 250 error = -EROFS; 251 if (IS_RDONLY(inode)) 252 goto dput_and_out; 253 254 error = -EPERM; 255 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 256 goto dput_and_out; 257 258 /* 259 * Make sure that there are no leases. 260 */ 261 error = break_lease(inode, FMODE_WRITE); 262 if (error) 263 goto dput_and_out; 264 265 error = get_write_access(inode); 266 if (error) 267 goto dput_and_out; 268 269 error = locks_verify_truncate(inode, NULL, length); 270 if (!error) { 271 DQUOT_INIT(inode); 272 error = do_truncate(nd.dentry, length, 0, NULL); 273 } 274 put_write_access(inode); 275 276 dput_and_out: 277 path_release(&nd); 278 out: 279 return error; 280 } 281 282 asmlinkage long sys_truncate(const char __user * path, unsigned long length) 283 { 284 /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ 285 return do_sys_truncate(path, (long)length); 286 } 287 288 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 289 { 290 struct inode * inode; 291 struct dentry *dentry; 292 struct file * file; 293 int error; 294 295 error = -EINVAL; 296 if (length < 0) 297 goto out; 298 error = -EBADF; 299 file = fget(fd); 300 if (!file) 301 goto out; 302 303 /* explicitly opened as large or we are on 64-bit box */ 304 if (file->f_flags & O_LARGEFILE) 305 small = 0; 306 307 dentry = file->f_path.dentry; 308 inode = dentry->d_inode; 309 error = -EINVAL; 310 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 311 goto out_putf; 312 313 error = -EINVAL; 314 /* Cannot ftruncate over 2^31 bytes without large file support */ 315 if (small && length > MAX_NON_LFS) 316 goto out_putf; 317 318 error = -EPERM; 319 if (IS_APPEND(inode)) 320 goto out_putf; 321 322 error = locks_verify_truncate(inode, file, length); 323 if (!error) 324 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 325 out_putf: 326 fput(file); 327 out: 328 return error; 329 } 330 331 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 332 { 333 long ret = do_sys_ftruncate(fd, length, 1); 334 /* avoid REGPARM breakage on x86: */ 335 prevent_tail_call(ret); 336 return ret; 337 } 338 339 /* LFS versions of truncate are only needed on 32 bit machines */ 340 #if BITS_PER_LONG == 32 341 asmlinkage long sys_truncate64(const char __user * path, loff_t length) 342 { 343 return do_sys_truncate(path, length); 344 } 345 346 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 347 { 348 long ret = do_sys_ftruncate(fd, length, 0); 349 /* avoid REGPARM breakage on x86: */ 350 prevent_tail_call(ret); 351 return ret; 352 } 353 #endif 354 355 /* 356 * access() needs to use the real uid/gid, not the effective uid/gid. 357 * We do this by temporarily clearing all FS-related capabilities and 358 * switching the fsuid/fsgid around to the real ones. 359 */ 360 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 361 { 362 struct nameidata nd; 363 int old_fsuid, old_fsgid; 364 kernel_cap_t old_cap; 365 int res; 366 367 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 368 return -EINVAL; 369 370 old_fsuid = current->fsuid; 371 old_fsgid = current->fsgid; 372 old_cap = current->cap_effective; 373 374 current->fsuid = current->uid; 375 current->fsgid = current->gid; 376 377 /* 378 * Clear the capabilities if we switch to a non-root user 379 * 380 * FIXME: There is a race here against sys_capset. The 381 * capabilities can change yet we will restore the old 382 * value below. We should hold task_capabilities_lock, 383 * but we cannot because user_path_walk can sleep. 384 */ 385 if (current->uid) 386 cap_clear(current->cap_effective); 387 else 388 current->cap_effective = current->cap_permitted; 389 390 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 391 if (res) 392 goto out; 393 394 res = vfs_permission(&nd, mode); 395 /* SuS v2 requires we report a read only fs too */ 396 if(res || !(mode & S_IWOTH) || 397 special_file(nd.dentry->d_inode->i_mode)) 398 goto out_path_release; 399 400 if(IS_RDONLY(nd.dentry->d_inode)) 401 res = -EROFS; 402 403 out_path_release: 404 path_release(&nd); 405 out: 406 current->fsuid = old_fsuid; 407 current->fsgid = old_fsgid; 408 current->cap_effective = old_cap; 409 410 return res; 411 } 412 413 asmlinkage long sys_access(const char __user *filename, int mode) 414 { 415 return sys_faccessat(AT_FDCWD, filename, mode); 416 } 417 418 asmlinkage long sys_chdir(const char __user * filename) 419 { 420 struct nameidata nd; 421 int error; 422 423 error = __user_walk(filename, 424 LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); 425 if (error) 426 goto out; 427 428 error = vfs_permission(&nd, MAY_EXEC); 429 if (error) 430 goto dput_and_out; 431 432 set_fs_pwd(current->fs, nd.mnt, nd.dentry); 433 434 dput_and_out: 435 path_release(&nd); 436 out: 437 return error; 438 } 439 440 asmlinkage long sys_fchdir(unsigned int fd) 441 { 442 struct file *file; 443 struct dentry *dentry; 444 struct inode *inode; 445 struct vfsmount *mnt; 446 int error; 447 448 error = -EBADF; 449 file = fget(fd); 450 if (!file) 451 goto out; 452 453 dentry = file->f_path.dentry; 454 mnt = file->f_path.mnt; 455 inode = dentry->d_inode; 456 457 error = -ENOTDIR; 458 if (!S_ISDIR(inode->i_mode)) 459 goto out_putf; 460 461 error = file_permission(file, MAY_EXEC); 462 if (!error) 463 set_fs_pwd(current->fs, mnt, dentry); 464 out_putf: 465 fput(file); 466 out: 467 return error; 468 } 469 470 asmlinkage long sys_chroot(const char __user * filename) 471 { 472 struct nameidata nd; 473 int error; 474 475 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 476 if (error) 477 goto out; 478 479 error = vfs_permission(&nd, MAY_EXEC); 480 if (error) 481 goto dput_and_out; 482 483 error = -EPERM; 484 if (!capable(CAP_SYS_CHROOT)) 485 goto dput_and_out; 486 487 set_fs_root(current->fs, nd.mnt, nd.dentry); 488 set_fs_altroot(); 489 error = 0; 490 dput_and_out: 491 path_release(&nd); 492 out: 493 return error; 494 } 495 496 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) 497 { 498 struct inode * inode; 499 struct dentry * dentry; 500 struct file * file; 501 int err = -EBADF; 502 struct iattr newattrs; 503 504 file = fget(fd); 505 if (!file) 506 goto out; 507 508 dentry = file->f_path.dentry; 509 inode = dentry->d_inode; 510 511 audit_inode(NULL, inode); 512 513 err = -EROFS; 514 if (IS_RDONLY(inode)) 515 goto out_putf; 516 err = -EPERM; 517 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 518 goto out_putf; 519 mutex_lock(&inode->i_mutex); 520 if (mode == (mode_t) -1) 521 mode = inode->i_mode; 522 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 523 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 524 err = notify_change(dentry, &newattrs); 525 mutex_unlock(&inode->i_mutex); 526 527 out_putf: 528 fput(file); 529 out: 530 return err; 531 } 532 533 asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 534 mode_t mode) 535 { 536 struct nameidata nd; 537 struct inode * inode; 538 int error; 539 struct iattr newattrs; 540 541 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 542 if (error) 543 goto out; 544 inode = nd.dentry->d_inode; 545 546 error = -EROFS; 547 if (IS_RDONLY(inode)) 548 goto dput_and_out; 549 550 error = -EPERM; 551 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 552 goto dput_and_out; 553 554 mutex_lock(&inode->i_mutex); 555 if (mode == (mode_t) -1) 556 mode = inode->i_mode; 557 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 558 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 559 error = notify_change(nd.dentry, &newattrs); 560 mutex_unlock(&inode->i_mutex); 561 562 dput_and_out: 563 path_release(&nd); 564 out: 565 return error; 566 } 567 568 asmlinkage long sys_chmod(const char __user *filename, mode_t mode) 569 { 570 return sys_fchmodat(AT_FDCWD, filename, mode); 571 } 572 573 static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 574 { 575 struct inode * inode; 576 int error; 577 struct iattr newattrs; 578 579 error = -ENOENT; 580 if (!(inode = dentry->d_inode)) { 581 printk(KERN_ERR "chown_common: NULL inode\n"); 582 goto out; 583 } 584 error = -EROFS; 585 if (IS_RDONLY(inode)) 586 goto out; 587 error = -EPERM; 588 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 589 goto out; 590 newattrs.ia_valid = ATTR_CTIME; 591 if (user != (uid_t) -1) { 592 newattrs.ia_valid |= ATTR_UID; 593 newattrs.ia_uid = user; 594 } 595 if (group != (gid_t) -1) { 596 newattrs.ia_valid |= ATTR_GID; 597 newattrs.ia_gid = group; 598 } 599 if (!S_ISDIR(inode->i_mode)) 600 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; 601 mutex_lock(&inode->i_mutex); 602 error = notify_change(dentry, &newattrs); 603 mutex_unlock(&inode->i_mutex); 604 out: 605 return error; 606 } 607 608 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 609 { 610 struct nameidata nd; 611 int error; 612 613 error = user_path_walk(filename, &nd); 614 if (error) 615 goto out; 616 error = chown_common(nd.dentry, user, group); 617 path_release(&nd); 618 out: 619 return error; 620 } 621 622 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 623 gid_t group, int flag) 624 { 625 struct nameidata nd; 626 int error = -EINVAL; 627 int follow; 628 629 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 630 goto out; 631 632 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 633 error = __user_walk_fd(dfd, filename, follow, &nd); 634 if (error) 635 goto out; 636 error = chown_common(nd.dentry, user, group); 637 path_release(&nd); 638 out: 639 return error; 640 } 641 642 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 643 { 644 struct nameidata nd; 645 int error; 646 647 error = user_path_walk_link(filename, &nd); 648 if (error) 649 goto out; 650 error = chown_common(nd.dentry, user, group); 651 path_release(&nd); 652 out: 653 return error; 654 } 655 656 657 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) 658 { 659 struct file * file; 660 int error = -EBADF; 661 struct dentry * dentry; 662 663 file = fget(fd); 664 if (!file) 665 goto out; 666 667 dentry = file->f_path.dentry; 668 audit_inode(NULL, dentry->d_inode); 669 error = chown_common(dentry, user, group); 670 fput(file); 671 out: 672 return error; 673 } 674 675 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 676 int flags, struct file *f, 677 int (*open)(struct inode *, struct file *)) 678 { 679 struct inode *inode; 680 int error; 681 682 f->f_flags = flags; 683 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 684 FMODE_PREAD | FMODE_PWRITE; 685 inode = dentry->d_inode; 686 if (f->f_mode & FMODE_WRITE) { 687 error = get_write_access(inode); 688 if (error) 689 goto cleanup_file; 690 } 691 692 f->f_mapping = inode->i_mapping; 693 f->f_path.dentry = dentry; 694 f->f_path.mnt = mnt; 695 f->f_pos = 0; 696 f->f_op = fops_get(inode->i_fop); 697 file_move(f, &inode->i_sb->s_files); 698 699 if (!open && f->f_op) 700 open = f->f_op->open; 701 if (open) { 702 error = open(inode, f); 703 if (error) 704 goto cleanup_all; 705 } 706 707 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 708 709 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 710 711 /* NB: we're sure to have correct a_ops only after f_op->open */ 712 if (f->f_flags & O_DIRECT) { 713 if (!f->f_mapping->a_ops || 714 ((!f->f_mapping->a_ops->direct_IO) && 715 (!f->f_mapping->a_ops->get_xip_page))) { 716 fput(f); 717 f = ERR_PTR(-EINVAL); 718 } 719 } 720 721 return f; 722 723 cleanup_all: 724 fops_put(f->f_op); 725 if (f->f_mode & FMODE_WRITE) 726 put_write_access(inode); 727 file_kill(f); 728 f->f_path.dentry = NULL; 729 f->f_path.mnt = NULL; 730 cleanup_file: 731 put_filp(f); 732 dput(dentry); 733 mntput(mnt); 734 return ERR_PTR(error); 735 } 736 737 /* 738 * Note that while the flag value (low two bits) for sys_open means: 739 * 00 - read-only 740 * 01 - write-only 741 * 10 - read-write 742 * 11 - special 743 * it is changed into 744 * 00 - no permissions needed 745 * 01 - read-permission 746 * 10 - write-permission 747 * 11 - read-write 748 * for the internal routines (ie open_namei()/follow_link() etc). 00 is 749 * used by symlinks. 750 */ 751 static struct file *do_filp_open(int dfd, const char *filename, int flags, 752 int mode) 753 { 754 int namei_flags, error; 755 struct nameidata nd; 756 757 namei_flags = flags; 758 if ((namei_flags+1) & O_ACCMODE) 759 namei_flags++; 760 761 error = open_namei(dfd, filename, namei_flags, mode, &nd); 762 if (!error) 763 return nameidata_to_filp(&nd, flags); 764 765 return ERR_PTR(error); 766 } 767 768 struct file *filp_open(const char *filename, int flags, int mode) 769 { 770 return do_filp_open(AT_FDCWD, filename, flags, mode); 771 } 772 EXPORT_SYMBOL(filp_open); 773 774 /** 775 * lookup_instantiate_filp - instantiates the open intent filp 776 * @nd: pointer to nameidata 777 * @dentry: pointer to dentry 778 * @open: open callback 779 * 780 * Helper for filesystems that want to use lookup open intents and pass back 781 * a fully instantiated struct file to the caller. 782 * This function is meant to be called from within a filesystem's 783 * lookup method. 784 * Beware of calling it for non-regular files! Those ->open methods might block 785 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo, 786 * leading to a deadlock, as nobody can open that fifo anymore, because 787 * another process to open fifo will block on locked parent when doing lookup). 788 * Note that in case of error, nd->intent.open.file is destroyed, but the 789 * path information remains valid. 790 * If the open callback is set to NULL, then the standard f_op->open() 791 * filesystem callback is substituted. 792 */ 793 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 794 int (*open)(struct inode *, struct file *)) 795 { 796 if (IS_ERR(nd->intent.open.file)) 797 goto out; 798 if (IS_ERR(dentry)) 799 goto out_err; 800 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), 801 nd->intent.open.flags - 1, 802 nd->intent.open.file, 803 open); 804 out: 805 return nd->intent.open.file; 806 out_err: 807 release_open_intent(nd); 808 nd->intent.open.file = (struct file *)dentry; 809 goto out; 810 } 811 EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 812 813 /** 814 * nameidata_to_filp - convert a nameidata to an open filp. 815 * @nd: pointer to nameidata 816 * @flags: open flags 817 * 818 * Note that this function destroys the original nameidata 819 */ 820 struct file *nameidata_to_filp(struct nameidata *nd, int flags) 821 { 822 struct file *filp; 823 824 /* Pick up the filp from the open intent */ 825 filp = nd->intent.open.file; 826 /* Has the filesystem initialised the file for us? */ 827 if (filp->f_path.dentry == NULL) 828 filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); 829 else 830 path_release(nd); 831 return filp; 832 } 833 834 /* 835 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an 836 * error. 837 */ 838 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) 839 { 840 int error; 841 struct file *f; 842 843 error = -ENFILE; 844 f = get_empty_filp(); 845 if (f == NULL) { 846 dput(dentry); 847 mntput(mnt); 848 return ERR_PTR(error); 849 } 850 851 return __dentry_open(dentry, mnt, flags, f, NULL); 852 } 853 EXPORT_SYMBOL(dentry_open); 854 855 /* 856 * Find an empty file descriptor entry, and mark it busy. 857 */ 858 int get_unused_fd(void) 859 { 860 struct files_struct * files = current->files; 861 int fd, error; 862 struct fdtable *fdt; 863 864 error = -EMFILE; 865 spin_lock(&files->file_lock); 866 867 repeat: 868 fdt = files_fdtable(files); 869 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, 870 files->next_fd); 871 872 /* 873 * N.B. For clone tasks sharing a files structure, this test 874 * will limit the total number of files that can be opened. 875 */ 876 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 877 goto out; 878 879 /* Do we need to expand the fd array or fd set? */ 880 error = expand_files(files, fd); 881 if (error < 0) 882 goto out; 883 884 if (error) { 885 /* 886 * If we needed to expand the fs array we 887 * might have blocked - try again. 888 */ 889 error = -EMFILE; 890 goto repeat; 891 } 892 893 FD_SET(fd, fdt->open_fds); 894 FD_CLR(fd, fdt->close_on_exec); 895 files->next_fd = fd + 1; 896 #if 1 897 /* Sanity check */ 898 if (fdt->fd[fd] != NULL) { 899 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); 900 fdt->fd[fd] = NULL; 901 } 902 #endif 903 error = fd; 904 905 out: 906 spin_unlock(&files->file_lock); 907 return error; 908 } 909 910 EXPORT_SYMBOL(get_unused_fd); 911 912 static void __put_unused_fd(struct files_struct *files, unsigned int fd) 913 { 914 struct fdtable *fdt = files_fdtable(files); 915 __FD_CLR(fd, fdt->open_fds); 916 if (fd < files->next_fd) 917 files->next_fd = fd; 918 } 919 920 void fastcall put_unused_fd(unsigned int fd) 921 { 922 struct files_struct *files = current->files; 923 spin_lock(&files->file_lock); 924 __put_unused_fd(files, fd); 925 spin_unlock(&files->file_lock); 926 } 927 928 EXPORT_SYMBOL(put_unused_fd); 929 930 /* 931 * Install a file pointer in the fd array. 932 * 933 * The VFS is full of places where we drop the files lock between 934 * setting the open_fds bitmap and installing the file in the file 935 * array. At any such point, we are vulnerable to a dup2() race 936 * installing a file in the array before us. We need to detect this and 937 * fput() the struct file we are about to overwrite in this case. 938 * 939 * It should never happen - if we allow dup2() do it, _really_ bad things 940 * will follow. 941 */ 942 943 void fastcall fd_install(unsigned int fd, struct file * file) 944 { 945 struct files_struct *files = current->files; 946 struct fdtable *fdt; 947 spin_lock(&files->file_lock); 948 fdt = files_fdtable(files); 949 BUG_ON(fdt->fd[fd] != NULL); 950 rcu_assign_pointer(fdt->fd[fd], file); 951 spin_unlock(&files->file_lock); 952 } 953 954 EXPORT_SYMBOL(fd_install); 955 956 long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 957 { 958 char *tmp = getname(filename); 959 int fd = PTR_ERR(tmp); 960 961 if (!IS_ERR(tmp)) { 962 fd = get_unused_fd(); 963 if (fd >= 0) { 964 struct file *f = do_filp_open(dfd, tmp, flags, mode); 965 if (IS_ERR(f)) { 966 put_unused_fd(fd); 967 fd = PTR_ERR(f); 968 } else { 969 fsnotify_open(f->f_path.dentry); 970 fd_install(fd, f); 971 } 972 } 973 putname(tmp); 974 } 975 return fd; 976 } 977 978 asmlinkage long sys_open(const char __user *filename, int flags, int mode) 979 { 980 long ret; 981 982 if (force_o_largefile()) 983 flags |= O_LARGEFILE; 984 985 ret = do_sys_open(AT_FDCWD, filename, flags, mode); 986 /* avoid REGPARM breakage on x86: */ 987 prevent_tail_call(ret); 988 return ret; 989 } 990 EXPORT_SYMBOL_GPL(sys_open); 991 992 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 993 int mode) 994 { 995 long ret; 996 997 if (force_o_largefile()) 998 flags |= O_LARGEFILE; 999 1000 ret = do_sys_open(dfd, filename, flags, mode); 1001 /* avoid REGPARM breakage on x86: */ 1002 prevent_tail_call(ret); 1003 return ret; 1004 } 1005 1006 #ifndef __alpha__ 1007 1008 /* 1009 * For backward compatibility? Maybe this should be moved 1010 * into arch/i386 instead? 1011 */ 1012 asmlinkage long sys_creat(const char __user * pathname, int mode) 1013 { 1014 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1015 } 1016 1017 #endif 1018 1019 /* 1020 * "id" is the POSIX thread ID. We use the 1021 * files pointer for this.. 1022 */ 1023 int filp_close(struct file *filp, fl_owner_t id) 1024 { 1025 int retval = 0; 1026 1027 if (!file_count(filp)) { 1028 printk(KERN_ERR "VFS: Close: file count is 0\n"); 1029 return 0; 1030 } 1031 1032 if (filp->f_op && filp->f_op->flush) 1033 retval = filp->f_op->flush(filp, id); 1034 1035 dnotify_flush(filp, id); 1036 locks_remove_posix(filp, id); 1037 fput(filp); 1038 return retval; 1039 } 1040 1041 EXPORT_SYMBOL(filp_close); 1042 1043 /* 1044 * Careful here! We test whether the file pointer is NULL before 1045 * releasing the fd. This ensures that one clone task can't release 1046 * an fd while another clone is opening it. 1047 */ 1048 asmlinkage long sys_close(unsigned int fd) 1049 { 1050 struct file * filp; 1051 struct files_struct *files = current->files; 1052 struct fdtable *fdt; 1053 int retval; 1054 1055 spin_lock(&files->file_lock); 1056 fdt = files_fdtable(files); 1057 if (fd >= fdt->max_fds) 1058 goto out_unlock; 1059 filp = fdt->fd[fd]; 1060 if (!filp) 1061 goto out_unlock; 1062 rcu_assign_pointer(fdt->fd[fd], NULL); 1063 FD_CLR(fd, fdt->close_on_exec); 1064 __put_unused_fd(files, fd); 1065 spin_unlock(&files->file_lock); 1066 retval = filp_close(filp, files); 1067 1068 /* can't restart close syscall because file table entry was cleared */ 1069 if (unlikely(retval == -ERESTARTSYS || 1070 retval == -ERESTARTNOINTR || 1071 retval == -ERESTARTNOHAND || 1072 retval == -ERESTART_RESTARTBLOCK)) 1073 retval = -EINTR; 1074 1075 return retval; 1076 1077 out_unlock: 1078 spin_unlock(&files->file_lock); 1079 return -EBADF; 1080 } 1081 1082 EXPORT_SYMBOL(sys_close); 1083 1084 /* 1085 * This routine simulates a hangup on the tty, to arrange that users 1086 * are given clean terminals at login time. 1087 */ 1088 asmlinkage long sys_vhangup(void) 1089 { 1090 if (capable(CAP_SYS_TTY_CONFIG)) { 1091 /* XXX: this needs locking */ 1092 tty_vhangup(current->signal->tty); 1093 return 0; 1094 } 1095 return -EPERM; 1096 } 1097 1098 /* 1099 * Called when an inode is about to be open. 1100 * We use this to disallow opening large files on 32bit systems if 1101 * the caller didn't specify O_LARGEFILE. On 64bit systems we force 1102 * on this flag in sys_open. 1103 */ 1104 int generic_file_open(struct inode * inode, struct file * filp) 1105 { 1106 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1107 return -EFBIG; 1108 return 0; 1109 } 1110 1111 EXPORT_SYMBOL(generic_file_open); 1112 1113 /* 1114 * This is used by subsystems that don't want seekable 1115 * file descriptors 1116 */ 1117 int nonseekable_open(struct inode *inode, struct file *filp) 1118 { 1119 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1120 return 0; 1121 } 1122 1123 EXPORT_SYMBOL(nonseekable_open); 1124