xref: /openbmc/linux/fs/fcntl.c (revision e868d61272caa648214046a096e5a6bfc068dc8c)
1 /*
2  *  linux/fs/fcntl.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/capability.h>
13 #include <linux/dnotify.h>
14 #include <linux/smp_lock.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/security.h>
18 #include <linux/ptrace.h>
19 #include <linux/signal.h>
20 #include <linux/rcupdate.h>
21 
22 #include <asm/poll.h>
23 #include <asm/siginfo.h>
24 #include <asm/uaccess.h>
25 
26 void fastcall set_close_on_exec(unsigned int fd, int flag)
27 {
28 	struct files_struct *files = current->files;
29 	struct fdtable *fdt;
30 	spin_lock(&files->file_lock);
31 	fdt = files_fdtable(files);
32 	if (flag)
33 		FD_SET(fd, fdt->close_on_exec);
34 	else
35 		FD_CLR(fd, fdt->close_on_exec);
36 	spin_unlock(&files->file_lock);
37 }
38 
39 static int get_close_on_exec(unsigned int fd)
40 {
41 	struct files_struct *files = current->files;
42 	struct fdtable *fdt;
43 	int res;
44 	rcu_read_lock();
45 	fdt = files_fdtable(files);
46 	res = FD_ISSET(fd, fdt->close_on_exec);
47 	rcu_read_unlock();
48 	return res;
49 }
50 
51 /*
52  * locate_fd finds a free file descriptor in the open_fds fdset,
53  * expanding the fd arrays if necessary.  Must be called with the
54  * file_lock held for write.
55  */
56 
57 static int locate_fd(struct files_struct *files,
58 			    struct file *file, unsigned int orig_start)
59 {
60 	unsigned int newfd;
61 	unsigned int start;
62 	int error;
63 	struct fdtable *fdt;
64 
65 	error = -EINVAL;
66 	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
67 		goto out;
68 
69 repeat:
70 	fdt = files_fdtable(files);
71 	/*
72 	 * Someone might have closed fd's in the range
73 	 * orig_start..fdt->next_fd
74 	 */
75 	start = orig_start;
76 	if (start < files->next_fd)
77 		start = files->next_fd;
78 
79 	newfd = start;
80 	if (start < fdt->max_fds)
81 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
82 					   fdt->max_fds, start);
83 
84 	error = -EMFILE;
85 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
86 		goto out;
87 
88 	error = expand_files(files, newfd);
89 	if (error < 0)
90 		goto out;
91 
92 	/*
93 	 * If we needed to expand the fs array we
94 	 * might have blocked - try again.
95 	 */
96 	if (error)
97 		goto repeat;
98 
99 	/*
100 	 * We reacquired files_lock, so we are safe as long as
101 	 * we reacquire the fdtable pointer and use it while holding
102 	 * the lock, no one can free it during that time.
103 	 */
104 	if (start <= files->next_fd)
105 		files->next_fd = newfd + 1;
106 
107 	error = newfd;
108 
109 out:
110 	return error;
111 }
112 
113 static int dupfd(struct file *file, unsigned int start)
114 {
115 	struct files_struct * files = current->files;
116 	struct fdtable *fdt;
117 	int fd;
118 
119 	spin_lock(&files->file_lock);
120 	fd = locate_fd(files, file, start);
121 	if (fd >= 0) {
122 		/* locate_fd() may have expanded fdtable, load the ptr */
123 		fdt = files_fdtable(files);
124 		FD_SET(fd, fdt->open_fds);
125 		FD_CLR(fd, fdt->close_on_exec);
126 		spin_unlock(&files->file_lock);
127 		fd_install(fd, file);
128 	} else {
129 		spin_unlock(&files->file_lock);
130 		fput(file);
131 	}
132 
133 	return fd;
134 }
135 
136 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
137 {
138 	int err = -EBADF;
139 	struct file * file, *tofree;
140 	struct files_struct * files = current->files;
141 	struct fdtable *fdt;
142 
143 	spin_lock(&files->file_lock);
144 	if (!(file = fcheck(oldfd)))
145 		goto out_unlock;
146 	err = newfd;
147 	if (newfd == oldfd)
148 		goto out_unlock;
149 	err = -EBADF;
150 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
151 		goto out_unlock;
152 	get_file(file);			/* We are now finished with oldfd */
153 
154 	err = expand_files(files, newfd);
155 	if (err < 0)
156 		goto out_fput;
157 
158 	/* To avoid races with open() and dup(), we will mark the fd as
159 	 * in-use in the open-file bitmap throughout the entire dup2()
160 	 * process.  This is quite safe: do_close() uses the fd array
161 	 * entry, not the bitmap, to decide what work needs to be
162 	 * done.  --sct */
163 	/* Doesn't work. open() might be there first. --AV */
164 
165 	/* Yes. It's a race. In user space. Nothing sane to do */
166 	err = -EBUSY;
167 	fdt = files_fdtable(files);
168 	tofree = fdt->fd[newfd];
169 	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
170 		goto out_fput;
171 
172 	rcu_assign_pointer(fdt->fd[newfd], file);
173 	FD_SET(newfd, fdt->open_fds);
174 	FD_CLR(newfd, fdt->close_on_exec);
175 	spin_unlock(&files->file_lock);
176 
177 	if (tofree)
178 		filp_close(tofree, files);
179 	err = newfd;
180 out:
181 	return err;
182 out_unlock:
183 	spin_unlock(&files->file_lock);
184 	goto out;
185 
186 out_fput:
187 	spin_unlock(&files->file_lock);
188 	fput(file);
189 	goto out;
190 }
191 
192 asmlinkage long sys_dup(unsigned int fildes)
193 {
194 	int ret = -EBADF;
195 	struct file * file = fget(fildes);
196 
197 	if (file)
198 		ret = dupfd(file, 0);
199 	return ret;
200 }
201 
202 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
203 
204 static int setfl(int fd, struct file * filp, unsigned long arg)
205 {
206 	struct inode * inode = filp->f_path.dentry->d_inode;
207 	int error = 0;
208 
209 	/*
210 	 * O_APPEND cannot be cleared if the file is marked as append-only
211 	 * and the file is open for write.
212 	 */
213 	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
214 		return -EPERM;
215 
216 	/* O_NOATIME can only be set by the owner or superuser */
217 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
218 		if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
219 			return -EPERM;
220 
221 	/* required for strict SunOS emulation */
222 	if (O_NONBLOCK != O_NDELAY)
223 	       if (arg & O_NDELAY)
224 		   arg |= O_NONBLOCK;
225 
226 	if (arg & O_DIRECT) {
227 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
228 			!filp->f_mapping->a_ops->direct_IO)
229 				return -EINVAL;
230 	}
231 
232 	if (filp->f_op && filp->f_op->check_flags)
233 		error = filp->f_op->check_flags(arg);
234 	if (error)
235 		return error;
236 
237 	lock_kernel();
238 	if ((arg ^ filp->f_flags) & FASYNC) {
239 		if (filp->f_op && filp->f_op->fasync) {
240 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
241 			if (error < 0)
242 				goto out;
243 		}
244 	}
245 
246 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
247  out:
248 	unlock_kernel();
249 	return error;
250 }
251 
252 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
253                      uid_t uid, uid_t euid, int force)
254 {
255 	write_lock_irq(&filp->f_owner.lock);
256 	if (force || !filp->f_owner.pid) {
257 		put_pid(filp->f_owner.pid);
258 		filp->f_owner.pid = get_pid(pid);
259 		filp->f_owner.pid_type = type;
260 		filp->f_owner.uid = uid;
261 		filp->f_owner.euid = euid;
262 	}
263 	write_unlock_irq(&filp->f_owner.lock);
264 }
265 
266 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
267 		int force)
268 {
269 	int err;
270 
271 	err = security_file_set_fowner(filp);
272 	if (err)
273 		return err;
274 
275 	f_modown(filp, pid, type, current->uid, current->euid, force);
276 	return 0;
277 }
278 EXPORT_SYMBOL(__f_setown);
279 
280 int f_setown(struct file *filp, unsigned long arg, int force)
281 {
282 	enum pid_type type;
283 	struct pid *pid;
284 	int who = arg;
285 	int result;
286 	type = PIDTYPE_PID;
287 	if (who < 0) {
288 		type = PIDTYPE_PGID;
289 		who = -who;
290 	}
291 	rcu_read_lock();
292 	pid = find_pid(who);
293 	result = __f_setown(filp, pid, type, force);
294 	rcu_read_unlock();
295 	return result;
296 }
297 EXPORT_SYMBOL(f_setown);
298 
299 void f_delown(struct file *filp)
300 {
301 	f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
302 }
303 
304 pid_t f_getown(struct file *filp)
305 {
306 	pid_t pid;
307 	read_lock(&filp->f_owner.lock);
308 	pid = pid_nr(filp->f_owner.pid);
309 	if (filp->f_owner.pid_type == PIDTYPE_PGID)
310 		pid = -pid;
311 	read_unlock(&filp->f_owner.lock);
312 	return pid;
313 }
314 
315 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
316 		struct file *filp)
317 {
318 	long err = -EINVAL;
319 
320 	switch (cmd) {
321 	case F_DUPFD:
322 		get_file(filp);
323 		err = dupfd(filp, arg);
324 		break;
325 	case F_GETFD:
326 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
327 		break;
328 	case F_SETFD:
329 		err = 0;
330 		set_close_on_exec(fd, arg & FD_CLOEXEC);
331 		break;
332 	case F_GETFL:
333 		err = filp->f_flags;
334 		break;
335 	case F_SETFL:
336 		err = setfl(fd, filp, arg);
337 		break;
338 	case F_GETLK:
339 		err = fcntl_getlk(filp, (struct flock __user *) arg);
340 		break;
341 	case F_SETLK:
342 	case F_SETLKW:
343 		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
344 		break;
345 	case F_GETOWN:
346 		/*
347 		 * XXX If f_owner is a process group, the
348 		 * negative return value will get converted
349 		 * into an error.  Oops.  If we keep the
350 		 * current syscall conventions, the only way
351 		 * to fix this will be in libc.
352 		 */
353 		err = f_getown(filp);
354 		force_successful_syscall_return();
355 		break;
356 	case F_SETOWN:
357 		err = f_setown(filp, arg, 1);
358 		break;
359 	case F_GETSIG:
360 		err = filp->f_owner.signum;
361 		break;
362 	case F_SETSIG:
363 		/* arg == 0 restores default behaviour. */
364 		if (!valid_signal(arg)) {
365 			break;
366 		}
367 		err = 0;
368 		filp->f_owner.signum = arg;
369 		break;
370 	case F_GETLEASE:
371 		err = fcntl_getlease(filp);
372 		break;
373 	case F_SETLEASE:
374 		err = fcntl_setlease(fd, filp, arg);
375 		break;
376 	case F_NOTIFY:
377 		err = fcntl_dirnotify(fd, filp, arg);
378 		break;
379 	default:
380 		break;
381 	}
382 	return err;
383 }
384 
385 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
386 {
387 	struct file *filp;
388 	long err = -EBADF;
389 
390 	filp = fget(fd);
391 	if (!filp)
392 		goto out;
393 
394 	err = security_file_fcntl(filp, cmd, arg);
395 	if (err) {
396 		fput(filp);
397 		return err;
398 	}
399 
400 	err = do_fcntl(fd, cmd, arg, filp);
401 
402  	fput(filp);
403 out:
404 	return err;
405 }
406 
407 #if BITS_PER_LONG == 32
408 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
409 {
410 	struct file * filp;
411 	long err;
412 
413 	err = -EBADF;
414 	filp = fget(fd);
415 	if (!filp)
416 		goto out;
417 
418 	err = security_file_fcntl(filp, cmd, arg);
419 	if (err) {
420 		fput(filp);
421 		return err;
422 	}
423 	err = -EBADF;
424 
425 	switch (cmd) {
426 		case F_GETLK64:
427 			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
428 			break;
429 		case F_SETLK64:
430 		case F_SETLKW64:
431 			err = fcntl_setlk64(fd, filp, cmd,
432 					(struct flock64 __user *) arg);
433 			break;
434 		default:
435 			err = do_fcntl(fd, cmd, arg, filp);
436 			break;
437 	}
438 	fput(filp);
439 out:
440 	return err;
441 }
442 #endif
443 
444 /* Table to convert sigio signal codes into poll band bitmaps */
445 
446 static const long band_table[NSIGPOLL] = {
447 	POLLIN | POLLRDNORM,			/* POLL_IN */
448 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
449 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
450 	POLLERR,				/* POLL_ERR */
451 	POLLPRI | POLLRDBAND,			/* POLL_PRI */
452 	POLLHUP | POLLERR			/* POLL_HUP */
453 };
454 
455 static inline int sigio_perm(struct task_struct *p,
456                              struct fown_struct *fown, int sig)
457 {
458 	return (((fown->euid == 0) ||
459 		 (fown->euid == p->suid) || (fown->euid == p->uid) ||
460 		 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
461 		!security_file_send_sigiotask(p, fown, sig));
462 }
463 
464 static void send_sigio_to_task(struct task_struct *p,
465 			       struct fown_struct *fown,
466 			       int fd,
467 			       int reason)
468 {
469 	if (!sigio_perm(p, fown, fown->signum))
470 		return;
471 
472 	switch (fown->signum) {
473 		siginfo_t si;
474 		default:
475 			/* Queue a rt signal with the appropriate fd as its
476 			   value.  We use SI_SIGIO as the source, not
477 			   SI_KERNEL, since kernel signals always get
478 			   delivered even if we can't queue.  Failure to
479 			   queue in this case _should_ be reported; we fall
480 			   back to SIGIO in that case. --sct */
481 			si.si_signo = fown->signum;
482 			si.si_errno = 0;
483 		        si.si_code  = reason;
484 			/* Make sure we are called with one of the POLL_*
485 			   reasons, otherwise we could leak kernel stack into
486 			   userspace.  */
487 			BUG_ON((reason & __SI_MASK) != __SI_POLL);
488 			if (reason - POLL_IN >= NSIGPOLL)
489 				si.si_band  = ~0L;
490 			else
491 				si.si_band = band_table[reason - POLL_IN];
492 			si.si_fd    = fd;
493 			if (!group_send_sig_info(fown->signum, &si, p))
494 				break;
495 		/* fall-through: fall back on the old plain SIGIO signal */
496 		case 0:
497 			group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
498 	}
499 }
500 
501 void send_sigio(struct fown_struct *fown, int fd, int band)
502 {
503 	struct task_struct *p;
504 	enum pid_type type;
505 	struct pid *pid;
506 
507 	read_lock(&fown->lock);
508 	type = fown->pid_type;
509 	pid = fown->pid;
510 	if (!pid)
511 		goto out_unlock_fown;
512 
513 	read_lock(&tasklist_lock);
514 	do_each_pid_task(pid, type, p) {
515 		send_sigio_to_task(p, fown, fd, band);
516 	} while_each_pid_task(pid, type, p);
517 	read_unlock(&tasklist_lock);
518  out_unlock_fown:
519 	read_unlock(&fown->lock);
520 }
521 
522 static void send_sigurg_to_task(struct task_struct *p,
523                                 struct fown_struct *fown)
524 {
525 	if (sigio_perm(p, fown, SIGURG))
526 		group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
527 }
528 
529 int send_sigurg(struct fown_struct *fown)
530 {
531 	struct task_struct *p;
532 	enum pid_type type;
533 	struct pid *pid;
534 	int ret = 0;
535 
536 	read_lock(&fown->lock);
537 	type = fown->pid_type;
538 	pid = fown->pid;
539 	if (!pid)
540 		goto out_unlock_fown;
541 
542 	ret = 1;
543 
544 	read_lock(&tasklist_lock);
545 	do_each_pid_task(pid, type, p) {
546 		send_sigurg_to_task(p, fown);
547 	} while_each_pid_task(pid, type, p);
548 	read_unlock(&tasklist_lock);
549  out_unlock_fown:
550 	read_unlock(&fown->lock);
551 	return ret;
552 }
553 
554 static DEFINE_RWLOCK(fasync_lock);
555 static struct kmem_cache *fasync_cache __read_mostly;
556 
557 /*
558  * fasync_helper() is used by some character device drivers (mainly mice)
559  * to set up the fasync queue. It returns negative on error, 0 if it did
560  * no changes and positive if it added/deleted the entry.
561  */
562 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
563 {
564 	struct fasync_struct *fa, **fp;
565 	struct fasync_struct *new = NULL;
566 	int result = 0;
567 
568 	if (on) {
569 		new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
570 		if (!new)
571 			return -ENOMEM;
572 	}
573 	write_lock_irq(&fasync_lock);
574 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
575 		if (fa->fa_file == filp) {
576 			if(on) {
577 				fa->fa_fd = fd;
578 				kmem_cache_free(fasync_cache, new);
579 			} else {
580 				*fp = fa->fa_next;
581 				kmem_cache_free(fasync_cache, fa);
582 				result = 1;
583 			}
584 			goto out;
585 		}
586 	}
587 
588 	if (on) {
589 		new->magic = FASYNC_MAGIC;
590 		new->fa_file = filp;
591 		new->fa_fd = fd;
592 		new->fa_next = *fapp;
593 		*fapp = new;
594 		result = 1;
595 	}
596 out:
597 	write_unlock_irq(&fasync_lock);
598 	return result;
599 }
600 
601 EXPORT_SYMBOL(fasync_helper);
602 
603 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
604 {
605 	while (fa) {
606 		struct fown_struct * fown;
607 		if (fa->magic != FASYNC_MAGIC) {
608 			printk(KERN_ERR "kill_fasync: bad magic number in "
609 			       "fasync_struct!\n");
610 			return;
611 		}
612 		fown = &fa->fa_file->f_owner;
613 		/* Don't send SIGURG to processes which have not set a
614 		   queued signum: SIGURG has its own default signalling
615 		   mechanism. */
616 		if (!(sig == SIGURG && fown->signum == 0))
617 			send_sigio(fown, fa->fa_fd, band);
618 		fa = fa->fa_next;
619 	}
620 }
621 
622 EXPORT_SYMBOL(__kill_fasync);
623 
624 void kill_fasync(struct fasync_struct **fp, int sig, int band)
625 {
626 	/* First a quick test without locking: usually
627 	 * the list is empty.
628 	 */
629 	if (*fp) {
630 		read_lock(&fasync_lock);
631 		/* reread *fp after obtaining the lock */
632 		__kill_fasync(*fp, sig, band);
633 		read_unlock(&fasync_lock);
634 	}
635 }
636 EXPORT_SYMBOL(kill_fasync);
637 
638 static int __init fasync_init(void)
639 {
640 	fasync_cache = kmem_cache_create("fasync_cache",
641 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL);
642 	return 0;
643 }
644 
645 module_init(fasync_init)
646