xref: /openbmc/linux/fs/fcntl.c (revision f15cbe6f1a4b4d9df59142fc8e4abb973302cf44)
1 /*
2  *  linux/fs/fcntl.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/fdtable.h>
13 #include <linux/capability.h>
14 #include <linux/dnotify.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/security.h>
18 #include <linux/ptrace.h>
19 #include <linux/signal.h>
20 #include <linux/rcupdate.h>
21 #include <linux/pid_namespace.h>
22 
23 #include <asm/poll.h>
24 #include <asm/siginfo.h>
25 #include <asm/uaccess.h>
26 
27 void set_close_on_exec(unsigned int fd, int flag)
28 {
29 	struct files_struct *files = current->files;
30 	struct fdtable *fdt;
31 	spin_lock(&files->file_lock);
32 	fdt = files_fdtable(files);
33 	if (flag)
34 		FD_SET(fd, fdt->close_on_exec);
35 	else
36 		FD_CLR(fd, fdt->close_on_exec);
37 	spin_unlock(&files->file_lock);
38 }
39 
40 static int get_close_on_exec(unsigned int fd)
41 {
42 	struct files_struct *files = current->files;
43 	struct fdtable *fdt;
44 	int res;
45 	rcu_read_lock();
46 	fdt = files_fdtable(files);
47 	res = FD_ISSET(fd, fdt->close_on_exec);
48 	rcu_read_unlock();
49 	return res;
50 }
51 
52 /*
53  * locate_fd finds a free file descriptor in the open_fds fdset,
54  * expanding the fd arrays if necessary.  Must be called with the
55  * file_lock held for write.
56  */
57 
58 static int locate_fd(unsigned int orig_start, int cloexec)
59 {
60 	struct files_struct *files = current->files;
61 	unsigned int newfd;
62 	unsigned int start;
63 	int error;
64 	struct fdtable *fdt;
65 
66 	spin_lock(&files->file_lock);
67 repeat:
68 	fdt = files_fdtable(files);
69 	/*
70 	 * Someone might have closed fd's in the range
71 	 * orig_start..fdt->next_fd
72 	 */
73 	start = orig_start;
74 	if (start < files->next_fd)
75 		start = files->next_fd;
76 
77 	newfd = start;
78 	if (start < fdt->max_fds)
79 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
80 					   fdt->max_fds, start);
81 
82 	error = expand_files(files, newfd);
83 	if (error < 0)
84 		goto out;
85 
86 	/*
87 	 * If we needed to expand the fs array we
88 	 * might have blocked - try again.
89 	 */
90 	if (error)
91 		goto repeat;
92 
93 	if (start <= files->next_fd)
94 		files->next_fd = newfd + 1;
95 
96 	FD_SET(newfd, fdt->open_fds);
97 	if (cloexec)
98 		FD_SET(newfd, fdt->close_on_exec);
99 	else
100 		FD_CLR(newfd, fdt->close_on_exec);
101 	error = newfd;
102 
103 out:
104 	spin_unlock(&files->file_lock);
105 	return error;
106 }
107 
108 static int dupfd(struct file *file, unsigned int start, int cloexec)
109 {
110 	int fd = locate_fd(start, cloexec);
111 	if (fd >= 0)
112 		fd_install(fd, file);
113 	else
114 		fput(file);
115 
116 	return fd;
117 }
118 
119 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
120 {
121 	int err = -EBADF;
122 	struct file * file, *tofree;
123 	struct files_struct * files = current->files;
124 	struct fdtable *fdt;
125 
126 	if ((flags & ~O_CLOEXEC) != 0)
127 		return -EINVAL;
128 
129 	if (unlikely(oldfd == newfd))
130 		return -EINVAL;
131 
132 	spin_lock(&files->file_lock);
133 	if (!(file = fcheck(oldfd)))
134 		goto out_unlock;
135 	get_file(file);			/* We are now finished with oldfd */
136 
137 	err = expand_files(files, newfd);
138 	if (unlikely(err < 0)) {
139 		if (err == -EMFILE)
140 			err = -EBADF;
141 		goto out_fput;
142 	}
143 
144 	/* To avoid races with open() and dup(), we will mark the fd as
145 	 * in-use in the open-file bitmap throughout the entire dup2()
146 	 * process.  This is quite safe: do_close() uses the fd array
147 	 * entry, not the bitmap, to decide what work needs to be
148 	 * done.  --sct */
149 	/* Doesn't work. open() might be there first. --AV */
150 
151 	/* Yes. It's a race. In user space. Nothing sane to do */
152 	err = -EBUSY;
153 	fdt = files_fdtable(files);
154 	tofree = fdt->fd[newfd];
155 	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
156 		goto out_fput;
157 
158 	rcu_assign_pointer(fdt->fd[newfd], file);
159 	FD_SET(newfd, fdt->open_fds);
160 	if (flags & O_CLOEXEC)
161 		FD_SET(newfd, fdt->close_on_exec);
162 	else
163 		FD_CLR(newfd, fdt->close_on_exec);
164 	spin_unlock(&files->file_lock);
165 
166 	if (tofree)
167 		filp_close(tofree, files);
168 	err = newfd;
169 out:
170 	return err;
171 out_unlock:
172 	spin_unlock(&files->file_lock);
173 	goto out;
174 
175 out_fput:
176 	spin_unlock(&files->file_lock);
177 	fput(file);
178 	goto out;
179 }
180 
181 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
182 {
183 	if (unlikely(newfd == oldfd)) { /* corner case */
184 		struct files_struct *files = current->files;
185 		rcu_read_lock();
186 		if (!fcheck_files(files, oldfd))
187 			oldfd = -EBADF;
188 		rcu_read_unlock();
189 		return oldfd;
190 	}
191 	return sys_dup3(oldfd, newfd, 0);
192 }
193 
194 asmlinkage long sys_dup(unsigned int fildes)
195 {
196 	int ret = -EBADF;
197 	struct file * file = fget(fildes);
198 
199 	if (file)
200 		ret = dupfd(file, 0, 0);
201 	return ret;
202 }
203 
204 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
205 
206 static int setfl(int fd, struct file * filp, unsigned long arg)
207 {
208 	struct inode * inode = filp->f_path.dentry->d_inode;
209 	int error = 0;
210 
211 	/*
212 	 * O_APPEND cannot be cleared if the file is marked as append-only
213 	 * and the file is open for write.
214 	 */
215 	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
216 		return -EPERM;
217 
218 	/* O_NOATIME can only be set by the owner or superuser */
219 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
220 		if (!is_owner_or_cap(inode))
221 			return -EPERM;
222 
223 	/* required for strict SunOS emulation */
224 	if (O_NONBLOCK != O_NDELAY)
225 	       if (arg & O_NDELAY)
226 		   arg |= O_NONBLOCK;
227 
228 	if (arg & O_DIRECT) {
229 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
230 			!filp->f_mapping->a_ops->direct_IO)
231 				return -EINVAL;
232 	}
233 
234 	if (filp->f_op && filp->f_op->check_flags)
235 		error = filp->f_op->check_flags(arg);
236 	if (error)
237 		return error;
238 
239 	if ((arg ^ filp->f_flags) & FASYNC) {
240 		if (filp->f_op && filp->f_op->fasync) {
241 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
242 			if (error < 0)
243 				goto out;
244 		}
245 	}
246 
247 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
248  out:
249 	return error;
250 }
251 
252 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
253                      uid_t uid, uid_t euid, int force)
254 {
255 	write_lock_irq(&filp->f_owner.lock);
256 	if (force || !filp->f_owner.pid) {
257 		put_pid(filp->f_owner.pid);
258 		filp->f_owner.pid = get_pid(pid);
259 		filp->f_owner.pid_type = type;
260 		filp->f_owner.uid = uid;
261 		filp->f_owner.euid = euid;
262 	}
263 	write_unlock_irq(&filp->f_owner.lock);
264 }
265 
266 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
267 		int force)
268 {
269 	int err;
270 
271 	err = security_file_set_fowner(filp);
272 	if (err)
273 		return err;
274 
275 	f_modown(filp, pid, type, current->uid, current->euid, force);
276 	return 0;
277 }
278 EXPORT_SYMBOL(__f_setown);
279 
280 int f_setown(struct file *filp, unsigned long arg, int force)
281 {
282 	enum pid_type type;
283 	struct pid *pid;
284 	int who = arg;
285 	int result;
286 	type = PIDTYPE_PID;
287 	if (who < 0) {
288 		type = PIDTYPE_PGID;
289 		who = -who;
290 	}
291 	rcu_read_lock();
292 	pid = find_vpid(who);
293 	result = __f_setown(filp, pid, type, force);
294 	rcu_read_unlock();
295 	return result;
296 }
297 EXPORT_SYMBOL(f_setown);
298 
299 void f_delown(struct file *filp)
300 {
301 	f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
302 }
303 
304 pid_t f_getown(struct file *filp)
305 {
306 	pid_t pid;
307 	read_lock(&filp->f_owner.lock);
308 	pid = pid_vnr(filp->f_owner.pid);
309 	if (filp->f_owner.pid_type == PIDTYPE_PGID)
310 		pid = -pid;
311 	read_unlock(&filp->f_owner.lock);
312 	return pid;
313 }
314 
315 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
316 		struct file *filp)
317 {
318 	long err = -EINVAL;
319 
320 	switch (cmd) {
321 	case F_DUPFD:
322 	case F_DUPFD_CLOEXEC:
323 		if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
324 			break;
325 		get_file(filp);
326 		err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
327 		break;
328 	case F_GETFD:
329 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
330 		break;
331 	case F_SETFD:
332 		err = 0;
333 		set_close_on_exec(fd, arg & FD_CLOEXEC);
334 		break;
335 	case F_GETFL:
336 		err = filp->f_flags;
337 		break;
338 	case F_SETFL:
339 		err = setfl(fd, filp, arg);
340 		break;
341 	case F_GETLK:
342 		err = fcntl_getlk(filp, (struct flock __user *) arg);
343 		break;
344 	case F_SETLK:
345 	case F_SETLKW:
346 		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
347 		break;
348 	case F_GETOWN:
349 		/*
350 		 * XXX If f_owner is a process group, the
351 		 * negative return value will get converted
352 		 * into an error.  Oops.  If we keep the
353 		 * current syscall conventions, the only way
354 		 * to fix this will be in libc.
355 		 */
356 		err = f_getown(filp);
357 		force_successful_syscall_return();
358 		break;
359 	case F_SETOWN:
360 		err = f_setown(filp, arg, 1);
361 		break;
362 	case F_GETSIG:
363 		err = filp->f_owner.signum;
364 		break;
365 	case F_SETSIG:
366 		/* arg == 0 restores default behaviour. */
367 		if (!valid_signal(arg)) {
368 			break;
369 		}
370 		err = 0;
371 		filp->f_owner.signum = arg;
372 		break;
373 	case F_GETLEASE:
374 		err = fcntl_getlease(filp);
375 		break;
376 	case F_SETLEASE:
377 		err = fcntl_setlease(fd, filp, arg);
378 		break;
379 	case F_NOTIFY:
380 		err = fcntl_dirnotify(fd, filp, arg);
381 		break;
382 	default:
383 		break;
384 	}
385 	return err;
386 }
387 
388 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
389 {
390 	struct file *filp;
391 	long err = -EBADF;
392 
393 	filp = fget(fd);
394 	if (!filp)
395 		goto out;
396 
397 	err = security_file_fcntl(filp, cmd, arg);
398 	if (err) {
399 		fput(filp);
400 		return err;
401 	}
402 
403 	err = do_fcntl(fd, cmd, arg, filp);
404 
405  	fput(filp);
406 out:
407 	return err;
408 }
409 
410 #if BITS_PER_LONG == 32
411 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
412 {
413 	struct file * filp;
414 	long err;
415 
416 	err = -EBADF;
417 	filp = fget(fd);
418 	if (!filp)
419 		goto out;
420 
421 	err = security_file_fcntl(filp, cmd, arg);
422 	if (err) {
423 		fput(filp);
424 		return err;
425 	}
426 	err = -EBADF;
427 
428 	switch (cmd) {
429 		case F_GETLK64:
430 			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
431 			break;
432 		case F_SETLK64:
433 		case F_SETLKW64:
434 			err = fcntl_setlk64(fd, filp, cmd,
435 					(struct flock64 __user *) arg);
436 			break;
437 		default:
438 			err = do_fcntl(fd, cmd, arg, filp);
439 			break;
440 	}
441 	fput(filp);
442 out:
443 	return err;
444 }
445 #endif
446 
447 /* Table to convert sigio signal codes into poll band bitmaps */
448 
449 static const long band_table[NSIGPOLL] = {
450 	POLLIN | POLLRDNORM,			/* POLL_IN */
451 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
452 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
453 	POLLERR,				/* POLL_ERR */
454 	POLLPRI | POLLRDBAND,			/* POLL_PRI */
455 	POLLHUP | POLLERR			/* POLL_HUP */
456 };
457 
458 static inline int sigio_perm(struct task_struct *p,
459                              struct fown_struct *fown, int sig)
460 {
461 	return (((fown->euid == 0) ||
462 		 (fown->euid == p->suid) || (fown->euid == p->uid) ||
463 		 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
464 		!security_file_send_sigiotask(p, fown, sig));
465 }
466 
467 static void send_sigio_to_task(struct task_struct *p,
468 			       struct fown_struct *fown,
469 			       int fd,
470 			       int reason)
471 {
472 	if (!sigio_perm(p, fown, fown->signum))
473 		return;
474 
475 	switch (fown->signum) {
476 		siginfo_t si;
477 		default:
478 			/* Queue a rt signal with the appropriate fd as its
479 			   value.  We use SI_SIGIO as the source, not
480 			   SI_KERNEL, since kernel signals always get
481 			   delivered even if we can't queue.  Failure to
482 			   queue in this case _should_ be reported; we fall
483 			   back to SIGIO in that case. --sct */
484 			si.si_signo = fown->signum;
485 			si.si_errno = 0;
486 		        si.si_code  = reason;
487 			/* Make sure we are called with one of the POLL_*
488 			   reasons, otherwise we could leak kernel stack into
489 			   userspace.  */
490 			BUG_ON((reason & __SI_MASK) != __SI_POLL);
491 			if (reason - POLL_IN >= NSIGPOLL)
492 				si.si_band  = ~0L;
493 			else
494 				si.si_band = band_table[reason - POLL_IN];
495 			si.si_fd    = fd;
496 			if (!group_send_sig_info(fown->signum, &si, p))
497 				break;
498 		/* fall-through: fall back on the old plain SIGIO signal */
499 		case 0:
500 			group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
501 	}
502 }
503 
504 void send_sigio(struct fown_struct *fown, int fd, int band)
505 {
506 	struct task_struct *p;
507 	enum pid_type type;
508 	struct pid *pid;
509 
510 	read_lock(&fown->lock);
511 	type = fown->pid_type;
512 	pid = fown->pid;
513 	if (!pid)
514 		goto out_unlock_fown;
515 
516 	read_lock(&tasklist_lock);
517 	do_each_pid_task(pid, type, p) {
518 		send_sigio_to_task(p, fown, fd, band);
519 	} while_each_pid_task(pid, type, p);
520 	read_unlock(&tasklist_lock);
521  out_unlock_fown:
522 	read_unlock(&fown->lock);
523 }
524 
525 static void send_sigurg_to_task(struct task_struct *p,
526                                 struct fown_struct *fown)
527 {
528 	if (sigio_perm(p, fown, SIGURG))
529 		group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
530 }
531 
532 int send_sigurg(struct fown_struct *fown)
533 {
534 	struct task_struct *p;
535 	enum pid_type type;
536 	struct pid *pid;
537 	int ret = 0;
538 
539 	read_lock(&fown->lock);
540 	type = fown->pid_type;
541 	pid = fown->pid;
542 	if (!pid)
543 		goto out_unlock_fown;
544 
545 	ret = 1;
546 
547 	read_lock(&tasklist_lock);
548 	do_each_pid_task(pid, type, p) {
549 		send_sigurg_to_task(p, fown);
550 	} while_each_pid_task(pid, type, p);
551 	read_unlock(&tasklist_lock);
552  out_unlock_fown:
553 	read_unlock(&fown->lock);
554 	return ret;
555 }
556 
557 static DEFINE_RWLOCK(fasync_lock);
558 static struct kmem_cache *fasync_cache __read_mostly;
559 
560 /*
561  * fasync_helper() is used by some character device drivers (mainly mice)
562  * to set up the fasync queue. It returns negative on error, 0 if it did
563  * no changes and positive if it added/deleted the entry.
564  */
565 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
566 {
567 	struct fasync_struct *fa, **fp;
568 	struct fasync_struct *new = NULL;
569 	int result = 0;
570 
571 	if (on) {
572 		new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
573 		if (!new)
574 			return -ENOMEM;
575 	}
576 	write_lock_irq(&fasync_lock);
577 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
578 		if (fa->fa_file == filp) {
579 			if(on) {
580 				fa->fa_fd = fd;
581 				kmem_cache_free(fasync_cache, new);
582 			} else {
583 				*fp = fa->fa_next;
584 				kmem_cache_free(fasync_cache, fa);
585 				result = 1;
586 			}
587 			goto out;
588 		}
589 	}
590 
591 	if (on) {
592 		new->magic = FASYNC_MAGIC;
593 		new->fa_file = filp;
594 		new->fa_fd = fd;
595 		new->fa_next = *fapp;
596 		*fapp = new;
597 		result = 1;
598 	}
599 out:
600 	write_unlock_irq(&fasync_lock);
601 	return result;
602 }
603 
604 EXPORT_SYMBOL(fasync_helper);
605 
606 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
607 {
608 	while (fa) {
609 		struct fown_struct * fown;
610 		if (fa->magic != FASYNC_MAGIC) {
611 			printk(KERN_ERR "kill_fasync: bad magic number in "
612 			       "fasync_struct!\n");
613 			return;
614 		}
615 		fown = &fa->fa_file->f_owner;
616 		/* Don't send SIGURG to processes which have not set a
617 		   queued signum: SIGURG has its own default signalling
618 		   mechanism. */
619 		if (!(sig == SIGURG && fown->signum == 0))
620 			send_sigio(fown, fa->fa_fd, band);
621 		fa = fa->fa_next;
622 	}
623 }
624 
625 EXPORT_SYMBOL(__kill_fasync);
626 
627 void kill_fasync(struct fasync_struct **fp, int sig, int band)
628 {
629 	/* First a quick test without locking: usually
630 	 * the list is empty.
631 	 */
632 	if (*fp) {
633 		read_lock(&fasync_lock);
634 		/* reread *fp after obtaining the lock */
635 		__kill_fasync(*fp, sig, band);
636 		read_unlock(&fasync_lock);
637 	}
638 }
639 EXPORT_SYMBOL(kill_fasync);
640 
641 static int __init fasync_init(void)
642 {
643 	fasync_cache = kmem_cache_create("fasync_cache",
644 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
645 	return 0;
646 }
647 
648 module_init(fasync_init)
649