xref: /openbmc/linux/fs/open.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  *  linux/fs/open.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/utime.h>
10 #include <linux/file.h>
11 #include <linux/smp_lock.h>
12 #include <linux/quotaops.h>
13 #include <linux/fsnotify.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/tty.h>
17 #include <linux/namei.h>
18 #include <linux/backing-dev.h>
19 #include <linux/security.h>
20 #include <linux/mount.h>
21 #include <linux/vfs.h>
22 #include <asm/uaccess.h>
23 #include <linux/fs.h>
24 #include <linux/personality.h>
25 #include <linux/pagemap.h>
26 #include <linux/syscalls.h>
27 #include <linux/rcupdate.h>
28 
29 #include <asm/unistd.h>
30 
31 int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
32 {
33 	int retval = -ENODEV;
34 
35 	if (sb) {
36 		retval = -ENOSYS;
37 		if (sb->s_op->statfs) {
38 			memset(buf, 0, sizeof(*buf));
39 			retval = security_sb_statfs(sb);
40 			if (retval)
41 				return retval;
42 			retval = sb->s_op->statfs(sb, buf);
43 			if (retval == 0 && buf->f_frsize == 0)
44 				buf->f_frsize = buf->f_bsize;
45 		}
46 	}
47 	return retval;
48 }
49 
50 EXPORT_SYMBOL(vfs_statfs);
51 
52 static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
53 {
54 	struct kstatfs st;
55 	int retval;
56 
57 	retval = vfs_statfs(sb, &st);
58 	if (retval)
59 		return retval;
60 
61 	if (sizeof(*buf) == sizeof(st))
62 		memcpy(buf, &st, sizeof(st));
63 	else {
64 		if (sizeof buf->f_blocks == 4) {
65 			if ((st.f_blocks | st.f_bfree | st.f_bavail) &
66 			    0xffffffff00000000ULL)
67 				return -EOVERFLOW;
68 			/*
69 			 * f_files and f_ffree may be -1; it's okay to stuff
70 			 * that into 32 bits
71 			 */
72 			if (st.f_files != -1 &&
73 			    (st.f_files & 0xffffffff00000000ULL))
74 				return -EOVERFLOW;
75 			if (st.f_ffree != -1 &&
76 			    (st.f_ffree & 0xffffffff00000000ULL))
77 				return -EOVERFLOW;
78 		}
79 
80 		buf->f_type = st.f_type;
81 		buf->f_bsize = st.f_bsize;
82 		buf->f_blocks = st.f_blocks;
83 		buf->f_bfree = st.f_bfree;
84 		buf->f_bavail = st.f_bavail;
85 		buf->f_files = st.f_files;
86 		buf->f_ffree = st.f_ffree;
87 		buf->f_fsid = st.f_fsid;
88 		buf->f_namelen = st.f_namelen;
89 		buf->f_frsize = st.f_frsize;
90 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
91 	}
92 	return 0;
93 }
94 
95 static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
96 {
97 	struct kstatfs st;
98 	int retval;
99 
100 	retval = vfs_statfs(sb, &st);
101 	if (retval)
102 		return retval;
103 
104 	if (sizeof(*buf) == sizeof(st))
105 		memcpy(buf, &st, sizeof(st));
106 	else {
107 		buf->f_type = st.f_type;
108 		buf->f_bsize = st.f_bsize;
109 		buf->f_blocks = st.f_blocks;
110 		buf->f_bfree = st.f_bfree;
111 		buf->f_bavail = st.f_bavail;
112 		buf->f_files = st.f_files;
113 		buf->f_ffree = st.f_ffree;
114 		buf->f_fsid = st.f_fsid;
115 		buf->f_namelen = st.f_namelen;
116 		buf->f_frsize = st.f_frsize;
117 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
118 	}
119 	return 0;
120 }
121 
122 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
123 {
124 	struct nameidata nd;
125 	int error;
126 
127 	error = user_path_walk(path, &nd);
128 	if (!error) {
129 		struct statfs tmp;
130 		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
131 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
132 			error = -EFAULT;
133 		path_release(&nd);
134 	}
135 	return error;
136 }
137 
138 
139 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
140 {
141 	struct nameidata nd;
142 	long error;
143 
144 	if (sz != sizeof(*buf))
145 		return -EINVAL;
146 	error = user_path_walk(path, &nd);
147 	if (!error) {
148 		struct statfs64 tmp;
149 		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
150 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
151 			error = -EFAULT;
152 		path_release(&nd);
153 	}
154 	return error;
155 }
156 
157 
158 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
159 {
160 	struct file * file;
161 	struct statfs tmp;
162 	int error;
163 
164 	error = -EBADF;
165 	file = fget(fd);
166 	if (!file)
167 		goto out;
168 	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
169 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
170 		error = -EFAULT;
171 	fput(file);
172 out:
173 	return error;
174 }
175 
176 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf)
177 {
178 	struct file * file;
179 	struct statfs64 tmp;
180 	int error;
181 
182 	if (sz != sizeof(*buf))
183 		return -EINVAL;
184 
185 	error = -EBADF;
186 	file = fget(fd);
187 	if (!file)
188 		goto out;
189 	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
190 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
191 		error = -EFAULT;
192 	fput(file);
193 out:
194 	return error;
195 }
196 
197 int do_truncate(struct dentry *dentry, loff_t length)
198 {
199 	int err;
200 	struct iattr newattrs;
201 
202 	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
203 	if (length < 0)
204 		return -EINVAL;
205 
206 	newattrs.ia_size = length;
207 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
208 
209 	down(&dentry->d_inode->i_sem);
210 	err = notify_change(dentry, &newattrs);
211 	up(&dentry->d_inode->i_sem);
212 	return err;
213 }
214 
215 static inline long do_sys_truncate(const char __user * path, loff_t length)
216 {
217 	struct nameidata nd;
218 	struct inode * inode;
219 	int error;
220 
221 	error = -EINVAL;
222 	if (length < 0)	/* sorry, but loff_t says... */
223 		goto out;
224 
225 	error = user_path_walk(path, &nd);
226 	if (error)
227 		goto out;
228 	inode = nd.dentry->d_inode;
229 
230 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
231 	error = -EISDIR;
232 	if (S_ISDIR(inode->i_mode))
233 		goto dput_and_out;
234 
235 	error = -EINVAL;
236 	if (!S_ISREG(inode->i_mode))
237 		goto dput_and_out;
238 
239 	error = permission(inode,MAY_WRITE,&nd);
240 	if (error)
241 		goto dput_and_out;
242 
243 	error = -EROFS;
244 	if (IS_RDONLY(inode))
245 		goto dput_and_out;
246 
247 	error = -EPERM;
248 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
249 		goto dput_and_out;
250 
251 	/*
252 	 * Make sure that there are no leases.
253 	 */
254 	error = break_lease(inode, FMODE_WRITE);
255 	if (error)
256 		goto dput_and_out;
257 
258 	error = get_write_access(inode);
259 	if (error)
260 		goto dput_and_out;
261 
262 	error = locks_verify_truncate(inode, NULL, length);
263 	if (!error) {
264 		DQUOT_INIT(inode);
265 		error = do_truncate(nd.dentry, length);
266 	}
267 	put_write_access(inode);
268 
269 dput_and_out:
270 	path_release(&nd);
271 out:
272 	return error;
273 }
274 
275 asmlinkage long sys_truncate(const char __user * path, unsigned long length)
276 {
277 	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
278 	return do_sys_truncate(path, (long)length);
279 }
280 
281 static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
282 {
283 	struct inode * inode;
284 	struct dentry *dentry;
285 	struct file * file;
286 	int error;
287 
288 	error = -EINVAL;
289 	if (length < 0)
290 		goto out;
291 	error = -EBADF;
292 	file = fget(fd);
293 	if (!file)
294 		goto out;
295 
296 	/* explicitly opened as large or we are on 64-bit box */
297 	if (file->f_flags & O_LARGEFILE)
298 		small = 0;
299 
300 	dentry = file->f_dentry;
301 	inode = dentry->d_inode;
302 	error = -EINVAL;
303 	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
304 		goto out_putf;
305 
306 	error = -EINVAL;
307 	/* Cannot ftruncate over 2^31 bytes without large file support */
308 	if (small && length > MAX_NON_LFS)
309 		goto out_putf;
310 
311 	error = -EPERM;
312 	if (IS_APPEND(inode))
313 		goto out_putf;
314 
315 	error = locks_verify_truncate(inode, file, length);
316 	if (!error)
317 		error = do_truncate(dentry, length);
318 out_putf:
319 	fput(file);
320 out:
321 	return error;
322 }
323 
324 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
325 {
326 	return do_sys_ftruncate(fd, length, 1);
327 }
328 
329 /* LFS versions of truncate are only needed on 32 bit machines */
330 #if BITS_PER_LONG == 32
331 asmlinkage long sys_truncate64(const char __user * path, loff_t length)
332 {
333 	return do_sys_truncate(path, length);
334 }
335 
336 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
337 {
338 	return do_sys_ftruncate(fd, length, 0);
339 }
340 #endif
341 
342 #ifdef __ARCH_WANT_SYS_UTIME
343 
344 /*
345  * sys_utime() can be implemented in user-level using sys_utimes().
346  * Is this for backwards compatibility?  If so, why not move it
347  * into the appropriate arch directory (for those architectures that
348  * need it).
349  */
350 
351 /* If times==NULL, set access and modification to current time,
352  * must be owner or have write permission.
353  * Else, update from *times, must be owner or super user.
354  */
355 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
356 {
357 	int error;
358 	struct nameidata nd;
359 	struct inode * inode;
360 	struct iattr newattrs;
361 
362 	error = user_path_walk(filename, &nd);
363 	if (error)
364 		goto out;
365 	inode = nd.dentry->d_inode;
366 
367 	error = -EROFS;
368 	if (IS_RDONLY(inode))
369 		goto dput_and_out;
370 
371 	/* Don't worry, the checks are done in inode_change_ok() */
372 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
373 	if (times) {
374 		error = -EPERM;
375 		if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
376 			goto dput_and_out;
377 
378 		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
379 		newattrs.ia_atime.tv_nsec = 0;
380 		if (!error)
381 			error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
382 		newattrs.ia_mtime.tv_nsec = 0;
383 		if (error)
384 			goto dput_and_out;
385 
386 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
387 	} else {
388                 error = -EACCES;
389                 if (IS_IMMUTABLE(inode))
390                         goto dput_and_out;
391 
392 		if (current->fsuid != inode->i_uid &&
393 		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
394 			goto dput_and_out;
395 	}
396 	down(&inode->i_sem);
397 	error = notify_change(nd.dentry, &newattrs);
398 	up(&inode->i_sem);
399 dput_and_out:
400 	path_release(&nd);
401 out:
402 	return error;
403 }
404 
405 #endif
406 
407 /* If times==NULL, set access and modification to current time,
408  * must be owner or have write permission.
409  * Else, update from *times, must be owner or super user.
410  */
411 long do_utimes(char __user * filename, struct timeval * times)
412 {
413 	int error;
414 	struct nameidata nd;
415 	struct inode * inode;
416 	struct iattr newattrs;
417 
418 	error = user_path_walk(filename, &nd);
419 
420 	if (error)
421 		goto out;
422 	inode = nd.dentry->d_inode;
423 
424 	error = -EROFS;
425 	if (IS_RDONLY(inode))
426 		goto dput_and_out;
427 
428 	/* Don't worry, the checks are done in inode_change_ok() */
429 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
430 	if (times) {
431 		error = -EPERM;
432                 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
433                         goto dput_and_out;
434 
435 		newattrs.ia_atime.tv_sec = times[0].tv_sec;
436 		newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000;
437 		newattrs.ia_mtime.tv_sec = times[1].tv_sec;
438 		newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000;
439 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
440 	} else {
441 		error = -EACCES;
442                 if (IS_IMMUTABLE(inode))
443                         goto dput_and_out;
444 
445 		if (current->fsuid != inode->i_uid &&
446 		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
447 			goto dput_and_out;
448 	}
449 	down(&inode->i_sem);
450 	error = notify_change(nd.dentry, &newattrs);
451 	up(&inode->i_sem);
452 dput_and_out:
453 	path_release(&nd);
454 out:
455 	return error;
456 }
457 
458 asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes)
459 {
460 	struct timeval times[2];
461 
462 	if (utimes && copy_from_user(&times, utimes, sizeof(times)))
463 		return -EFAULT;
464 	return do_utimes(filename, utimes ? times : NULL);
465 }
466 
467 
468 /*
469  * access() needs to use the real uid/gid, not the effective uid/gid.
470  * We do this by temporarily clearing all FS-related capabilities and
471  * switching the fsuid/fsgid around to the real ones.
472  */
473 asmlinkage long sys_access(const char __user * filename, int mode)
474 {
475 	struct nameidata nd;
476 	int old_fsuid, old_fsgid;
477 	kernel_cap_t old_cap;
478 	int res;
479 
480 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
481 		return -EINVAL;
482 
483 	old_fsuid = current->fsuid;
484 	old_fsgid = current->fsgid;
485 	old_cap = current->cap_effective;
486 
487 	current->fsuid = current->uid;
488 	current->fsgid = current->gid;
489 
490 	/*
491 	 * Clear the capabilities if we switch to a non-root user
492 	 *
493 	 * FIXME: There is a race here against sys_capset.  The
494 	 * capabilities can change yet we will restore the old
495 	 * value below.  We should hold task_capabilities_lock,
496 	 * but we cannot because user_path_walk can sleep.
497 	 */
498 	if (current->uid)
499 		cap_clear(current->cap_effective);
500 	else
501 		current->cap_effective = current->cap_permitted;
502 
503 	res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
504 	if (!res) {
505 		res = permission(nd.dentry->d_inode, mode, &nd);
506 		/* SuS v2 requires we report a read only fs too */
507 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
508 		   && !special_file(nd.dentry->d_inode->i_mode))
509 			res = -EROFS;
510 		path_release(&nd);
511 	}
512 
513 	current->fsuid = old_fsuid;
514 	current->fsgid = old_fsgid;
515 	current->cap_effective = old_cap;
516 
517 	return res;
518 }
519 
520 asmlinkage long sys_chdir(const char __user * filename)
521 {
522 	struct nameidata nd;
523 	int error;
524 
525 	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
526 	if (error)
527 		goto out;
528 
529 	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
530 	if (error)
531 		goto dput_and_out;
532 
533 	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
534 
535 dput_and_out:
536 	path_release(&nd);
537 out:
538 	return error;
539 }
540 
541 asmlinkage long sys_fchdir(unsigned int fd)
542 {
543 	struct file *file;
544 	struct dentry *dentry;
545 	struct inode *inode;
546 	struct vfsmount *mnt;
547 	int error;
548 
549 	error = -EBADF;
550 	file = fget(fd);
551 	if (!file)
552 		goto out;
553 
554 	dentry = file->f_dentry;
555 	mnt = file->f_vfsmnt;
556 	inode = dentry->d_inode;
557 
558 	error = -ENOTDIR;
559 	if (!S_ISDIR(inode->i_mode))
560 		goto out_putf;
561 
562 	error = permission(inode, MAY_EXEC, NULL);
563 	if (!error)
564 		set_fs_pwd(current->fs, mnt, dentry);
565 out_putf:
566 	fput(file);
567 out:
568 	return error;
569 }
570 
571 asmlinkage long sys_chroot(const char __user * filename)
572 {
573 	struct nameidata nd;
574 	int error;
575 
576 	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
577 	if (error)
578 		goto out;
579 
580 	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
581 	if (error)
582 		goto dput_and_out;
583 
584 	error = -EPERM;
585 	if (!capable(CAP_SYS_CHROOT))
586 		goto dput_and_out;
587 
588 	set_fs_root(current->fs, nd.mnt, nd.dentry);
589 	set_fs_altroot();
590 	error = 0;
591 dput_and_out:
592 	path_release(&nd);
593 out:
594 	return error;
595 }
596 
597 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
598 {
599 	struct inode * inode;
600 	struct dentry * dentry;
601 	struct file * file;
602 	int err = -EBADF;
603 	struct iattr newattrs;
604 
605 	file = fget(fd);
606 	if (!file)
607 		goto out;
608 
609 	dentry = file->f_dentry;
610 	inode = dentry->d_inode;
611 
612 	err = -EROFS;
613 	if (IS_RDONLY(inode))
614 		goto out_putf;
615 	err = -EPERM;
616 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
617 		goto out_putf;
618 	down(&inode->i_sem);
619 	if (mode == (mode_t) -1)
620 		mode = inode->i_mode;
621 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
622 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
623 	err = notify_change(dentry, &newattrs);
624 	up(&inode->i_sem);
625 
626 out_putf:
627 	fput(file);
628 out:
629 	return err;
630 }
631 
632 asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
633 {
634 	struct nameidata nd;
635 	struct inode * inode;
636 	int error;
637 	struct iattr newattrs;
638 
639 	error = user_path_walk(filename, &nd);
640 	if (error)
641 		goto out;
642 	inode = nd.dentry->d_inode;
643 
644 	error = -EROFS;
645 	if (IS_RDONLY(inode))
646 		goto dput_and_out;
647 
648 	error = -EPERM;
649 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
650 		goto dput_and_out;
651 
652 	down(&inode->i_sem);
653 	if (mode == (mode_t) -1)
654 		mode = inode->i_mode;
655 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
656 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
657 	error = notify_change(nd.dentry, &newattrs);
658 	up(&inode->i_sem);
659 
660 dput_and_out:
661 	path_release(&nd);
662 out:
663 	return error;
664 }
665 
666 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
667 {
668 	struct inode * inode;
669 	int error;
670 	struct iattr newattrs;
671 
672 	error = -ENOENT;
673 	if (!(inode = dentry->d_inode)) {
674 		printk(KERN_ERR "chown_common: NULL inode\n");
675 		goto out;
676 	}
677 	error = -EROFS;
678 	if (IS_RDONLY(inode))
679 		goto out;
680 	error = -EPERM;
681 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
682 		goto out;
683 	newattrs.ia_valid =  ATTR_CTIME;
684 	if (user != (uid_t) -1) {
685 		newattrs.ia_valid |= ATTR_UID;
686 		newattrs.ia_uid = user;
687 	}
688 	if (group != (gid_t) -1) {
689 		newattrs.ia_valid |= ATTR_GID;
690 		newattrs.ia_gid = group;
691 	}
692 	if (!S_ISDIR(inode->i_mode))
693 		newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
694 	down(&inode->i_sem);
695 	error = notify_change(dentry, &newattrs);
696 	up(&inode->i_sem);
697 out:
698 	return error;
699 }
700 
701 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
702 {
703 	struct nameidata nd;
704 	int error;
705 
706 	error = user_path_walk(filename, &nd);
707 	if (!error) {
708 		error = chown_common(nd.dentry, user, group);
709 		path_release(&nd);
710 	}
711 	return error;
712 }
713 
714 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
715 {
716 	struct nameidata nd;
717 	int error;
718 
719 	error = user_path_walk_link(filename, &nd);
720 	if (!error) {
721 		error = chown_common(nd.dentry, user, group);
722 		path_release(&nd);
723 	}
724 	return error;
725 }
726 
727 
728 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
729 {
730 	struct file * file;
731 	int error = -EBADF;
732 
733 	file = fget(fd);
734 	if (file) {
735 		error = chown_common(file->f_dentry, user, group);
736 		fput(file);
737 	}
738 	return error;
739 }
740 
741 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
742 					int flags, struct file *f,
743 					int (*open)(struct inode *, struct file *))
744 {
745 	struct inode *inode;
746 	int error;
747 
748 	f->f_flags = flags;
749 	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
750 				FMODE_PREAD | FMODE_PWRITE;
751 	inode = dentry->d_inode;
752 	if (f->f_mode & FMODE_WRITE) {
753 		error = get_write_access(inode);
754 		if (error)
755 			goto cleanup_file;
756 	}
757 
758 	f->f_mapping = inode->i_mapping;
759 	f->f_dentry = dentry;
760 	f->f_vfsmnt = mnt;
761 	f->f_pos = 0;
762 	f->f_op = fops_get(inode->i_fop);
763 	file_move(f, &inode->i_sb->s_files);
764 
765 	if (!open && f->f_op)
766 		open = f->f_op->open;
767 	if (open) {
768 		error = open(inode, f);
769 		if (error)
770 			goto cleanup_all;
771 	}
772 
773 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
774 
775 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
776 
777 	/* NB: we're sure to have correct a_ops only after f_op->open */
778 	if (f->f_flags & O_DIRECT) {
779 		if (!f->f_mapping->a_ops ||
780 		    ((!f->f_mapping->a_ops->direct_IO) &&
781 		    (!f->f_mapping->a_ops->get_xip_page))) {
782 			fput(f);
783 			f = ERR_PTR(-EINVAL);
784 		}
785 	}
786 
787 	return f;
788 
789 cleanup_all:
790 	fops_put(f->f_op);
791 	if (f->f_mode & FMODE_WRITE)
792 		put_write_access(inode);
793 	file_kill(f);
794 	f->f_dentry = NULL;
795 	f->f_vfsmnt = NULL;
796 cleanup_file:
797 	put_filp(f);
798 	dput(dentry);
799 	mntput(mnt);
800 	return ERR_PTR(error);
801 }
802 
803 /*
804  * Note that while the flag value (low two bits) for sys_open means:
805  *	00 - read-only
806  *	01 - write-only
807  *	10 - read-write
808  *	11 - special
809  * it is changed into
810  *	00 - no permissions needed
811  *	01 - read-permission
812  *	10 - write-permission
813  *	11 - read-write
814  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
815  * used by symlinks.
816  */
817 struct file *filp_open(const char * filename, int flags, int mode)
818 {
819 	int namei_flags, error;
820 	struct nameidata nd;
821 
822 	namei_flags = flags;
823 	if ((namei_flags+1) & O_ACCMODE)
824 		namei_flags++;
825 
826 	error = open_namei(filename, namei_flags, mode, &nd);
827 	if (!error)
828 		return nameidata_to_filp(&nd, flags);
829 
830 	return ERR_PTR(error);
831 }
832 EXPORT_SYMBOL(filp_open);
833 
834 /**
835  * lookup_instantiate_filp - instantiates the open intent filp
836  * @nd: pointer to nameidata
837  * @dentry: pointer to dentry
838  * @open: open callback
839  *
840  * Helper for filesystems that want to use lookup open intents and pass back
841  * a fully instantiated struct file to the caller.
842  * This function is meant to be called from within a filesystem's
843  * lookup method.
844  * Note that in case of error, nd->intent.open.file is destroyed, but the
845  * path information remains valid.
846  * If the open callback is set to NULL, then the standard f_op->open()
847  * filesystem callback is substituted.
848  */
849 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
850 		int (*open)(struct inode *, struct file *))
851 {
852 	if (IS_ERR(nd->intent.open.file))
853 		goto out;
854 	if (IS_ERR(dentry))
855 		goto out_err;
856 	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
857 					     nd->intent.open.flags - 1,
858 					     nd->intent.open.file,
859 					     open);
860 out:
861 	return nd->intent.open.file;
862 out_err:
863 	release_open_intent(nd);
864 	nd->intent.open.file = (struct file *)dentry;
865 	goto out;
866 }
867 EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
868 
869 /**
870  * nameidata_to_filp - convert a nameidata to an open filp.
871  * @nd: pointer to nameidata
872  * @flags: open flags
873  *
874  * Note that this function destroys the original nameidata
875  */
876 struct file *nameidata_to_filp(struct nameidata *nd, int flags)
877 {
878 	struct file *filp;
879 
880 	/* Pick up the filp from the open intent */
881 	filp = nd->intent.open.file;
882 	/* Has the filesystem initialised the file for us? */
883 	if (filp->f_dentry == NULL)
884 		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
885 	else
886 		path_release(nd);
887 	return filp;
888 }
889 
890 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
891 {
892 	int error;
893 	struct file *f;
894 
895 	error = -ENFILE;
896 	f = get_empty_filp();
897 	if (f == NULL)
898 		return ERR_PTR(error);
899 
900 	return __dentry_open(dentry, mnt, flags, f, NULL);
901 }
902 EXPORT_SYMBOL(dentry_open);
903 
904 /*
905  * Find an empty file descriptor entry, and mark it busy.
906  */
907 int get_unused_fd(void)
908 {
909 	struct files_struct * files = current->files;
910 	int fd, error;
911 	struct fdtable *fdt;
912 
913   	error = -EMFILE;
914 	spin_lock(&files->file_lock);
915 
916 repeat:
917 	fdt = files_fdtable(files);
918  	fd = find_next_zero_bit(fdt->open_fds->fds_bits,
919 				fdt->max_fdset,
920 				fdt->next_fd);
921 
922 	/*
923 	 * N.B. For clone tasks sharing a files structure, this test
924 	 * will limit the total number of files that can be opened.
925 	 */
926 	if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
927 		goto out;
928 
929 	/* Do we need to expand the fd array or fd set?  */
930 	error = expand_files(files, fd);
931 	if (error < 0)
932 		goto out;
933 
934 	if (error) {
935 		/*
936 	 	 * If we needed to expand the fs array we
937 		 * might have blocked - try again.
938 		 */
939 		error = -EMFILE;
940 		goto repeat;
941 	}
942 
943 	FD_SET(fd, fdt->open_fds);
944 	FD_CLR(fd, fdt->close_on_exec);
945 	fdt->next_fd = fd + 1;
946 #if 1
947 	/* Sanity check */
948 	if (fdt->fd[fd] != NULL) {
949 		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
950 		fdt->fd[fd] = NULL;
951 	}
952 #endif
953 	error = fd;
954 
955 out:
956 	spin_unlock(&files->file_lock);
957 	return error;
958 }
959 
960 EXPORT_SYMBOL(get_unused_fd);
961 
962 static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
963 {
964 	struct fdtable *fdt = files_fdtable(files);
965 	__FD_CLR(fd, fdt->open_fds);
966 	if (fd < fdt->next_fd)
967 		fdt->next_fd = fd;
968 }
969 
970 void fastcall put_unused_fd(unsigned int fd)
971 {
972 	struct files_struct *files = current->files;
973 	spin_lock(&files->file_lock);
974 	__put_unused_fd(files, fd);
975 	spin_unlock(&files->file_lock);
976 }
977 
978 EXPORT_SYMBOL(put_unused_fd);
979 
980 /*
981  * Install a file pointer in the fd array.
982  *
983  * The VFS is full of places where we drop the files lock between
984  * setting the open_fds bitmap and installing the file in the file
985  * array.  At any such point, we are vulnerable to a dup2() race
986  * installing a file in the array before us.  We need to detect this and
987  * fput() the struct file we are about to overwrite in this case.
988  *
989  * It should never happen - if we allow dup2() do it, _really_ bad things
990  * will follow.
991  */
992 
993 void fastcall fd_install(unsigned int fd, struct file * file)
994 {
995 	struct files_struct *files = current->files;
996 	struct fdtable *fdt;
997 	spin_lock(&files->file_lock);
998 	fdt = files_fdtable(files);
999 	BUG_ON(fdt->fd[fd] != NULL);
1000 	rcu_assign_pointer(fdt->fd[fd], file);
1001 	spin_unlock(&files->file_lock);
1002 }
1003 
1004 EXPORT_SYMBOL(fd_install);
1005 
1006 long do_sys_open(const char __user *filename, int flags, int mode)
1007 {
1008 	char *tmp = getname(filename);
1009 	int fd = PTR_ERR(tmp);
1010 
1011 	if (!IS_ERR(tmp)) {
1012 		fd = get_unused_fd();
1013 		if (fd >= 0) {
1014 			struct file *f = filp_open(tmp, flags, mode);
1015 			if (IS_ERR(f)) {
1016 				put_unused_fd(fd);
1017 				fd = PTR_ERR(f);
1018 			} else {
1019 				fsnotify_open(f->f_dentry);
1020 				fd_install(fd, f);
1021 			}
1022 		}
1023 		putname(tmp);
1024 	}
1025 	return fd;
1026 }
1027 
1028 asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1029 {
1030 	if (force_o_largefile())
1031 		flags |= O_LARGEFILE;
1032 
1033 	return do_sys_open(filename, flags, mode);
1034 }
1035 EXPORT_SYMBOL_GPL(sys_open);
1036 
1037 #ifndef __alpha__
1038 
1039 /*
1040  * For backward compatibility?  Maybe this should be moved
1041  * into arch/i386 instead?
1042  */
1043 asmlinkage long sys_creat(const char __user * pathname, int mode)
1044 {
1045 	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
1046 }
1047 
1048 #endif
1049 
1050 /*
1051  * "id" is the POSIX thread ID. We use the
1052  * files pointer for this..
1053  */
1054 int filp_close(struct file *filp, fl_owner_t id)
1055 {
1056 	int retval = 0;
1057 
1058 	if (!file_count(filp)) {
1059 		printk(KERN_ERR "VFS: Close: file count is 0\n");
1060 		return 0;
1061 	}
1062 
1063 	if (filp->f_op && filp->f_op->flush)
1064 		retval = filp->f_op->flush(filp);
1065 
1066 	dnotify_flush(filp, id);
1067 	locks_remove_posix(filp, id);
1068 	fput(filp);
1069 	return retval;
1070 }
1071 
1072 EXPORT_SYMBOL(filp_close);
1073 
1074 /*
1075  * Careful here! We test whether the file pointer is NULL before
1076  * releasing the fd. This ensures that one clone task can't release
1077  * an fd while another clone is opening it.
1078  */
1079 asmlinkage long sys_close(unsigned int fd)
1080 {
1081 	struct file * filp;
1082 	struct files_struct *files = current->files;
1083 	struct fdtable *fdt;
1084 
1085 	spin_lock(&files->file_lock);
1086 	fdt = files_fdtable(files);
1087 	if (fd >= fdt->max_fds)
1088 		goto out_unlock;
1089 	filp = fdt->fd[fd];
1090 	if (!filp)
1091 		goto out_unlock;
1092 	rcu_assign_pointer(fdt->fd[fd], NULL);
1093 	FD_CLR(fd, fdt->close_on_exec);
1094 	__put_unused_fd(files, fd);
1095 	spin_unlock(&files->file_lock);
1096 	return filp_close(filp, files);
1097 
1098 out_unlock:
1099 	spin_unlock(&files->file_lock);
1100 	return -EBADF;
1101 }
1102 
1103 EXPORT_SYMBOL(sys_close);
1104 
1105 /*
1106  * This routine simulates a hangup on the tty, to arrange that users
1107  * are given clean terminals at login time.
1108  */
1109 asmlinkage long sys_vhangup(void)
1110 {
1111 	if (capable(CAP_SYS_TTY_CONFIG)) {
1112 		tty_vhangup(current->signal->tty);
1113 		return 0;
1114 	}
1115 	return -EPERM;
1116 }
1117 
1118 /*
1119  * Called when an inode is about to be open.
1120  * We use this to disallow opening large files on 32bit systems if
1121  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1122  * on this flag in sys_open.
1123  */
1124 int generic_file_open(struct inode * inode, struct file * filp)
1125 {
1126 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1127 		return -EFBIG;
1128 	return 0;
1129 }
1130 
1131 EXPORT_SYMBOL(generic_file_open);
1132 
1133 /*
1134  * This is used by subsystems that don't want seekable
1135  * file descriptors
1136  */
1137 int nonseekable_open(struct inode *inode, struct file *filp)
1138 {
1139 	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1140 	return 0;
1141 }
1142 
1143 EXPORT_SYMBOL(nonseekable_open);
1144