xref: /openbmc/linux/fs/fcntl.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  *  linux/fs/fcntl.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/dnotify.h>
13 #include <linux/smp_lock.h>
14 #include <linux/slab.h>
15 #include <linux/module.h>
16 #include <linux/security.h>
17 #include <linux/ptrace.h>
18 #include <linux/signal.h>
19 #include <linux/rcupdate.h>
20 
21 #include <asm/poll.h>
22 #include <asm/siginfo.h>
23 #include <asm/uaccess.h>
24 
25 void fastcall set_close_on_exec(unsigned int fd, int flag)
26 {
27 	struct files_struct *files = current->files;
28 	struct fdtable *fdt;
29 	spin_lock(&files->file_lock);
30 	fdt = files_fdtable(files);
31 	if (flag)
32 		FD_SET(fd, fdt->close_on_exec);
33 	else
34 		FD_CLR(fd, fdt->close_on_exec);
35 	spin_unlock(&files->file_lock);
36 }
37 
38 static inline int get_close_on_exec(unsigned int fd)
39 {
40 	struct files_struct *files = current->files;
41 	struct fdtable *fdt;
42 	int res;
43 	rcu_read_lock();
44 	fdt = files_fdtable(files);
45 	res = FD_ISSET(fd, fdt->close_on_exec);
46 	rcu_read_unlock();
47 	return res;
48 }
49 
50 /*
51  * locate_fd finds a free file descriptor in the open_fds fdset,
52  * expanding the fd arrays if necessary.  Must be called with the
53  * file_lock held for write.
54  */
55 
56 static int locate_fd(struct files_struct *files,
57 			    struct file *file, unsigned int orig_start)
58 {
59 	unsigned int newfd;
60 	unsigned int start;
61 	int error;
62 	struct fdtable *fdt;
63 
64 	error = -EINVAL;
65 	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
66 		goto out;
67 
68 repeat:
69 	fdt = files_fdtable(files);
70 	/*
71 	 * Someone might have closed fd's in the range
72 	 * orig_start..fdt->next_fd
73 	 */
74 	start = orig_start;
75 	if (start < fdt->next_fd)
76 		start = fdt->next_fd;
77 
78 	newfd = start;
79 	if (start < fdt->max_fdset) {
80 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
81 			fdt->max_fdset, start);
82 	}
83 
84 	error = -EMFILE;
85 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
86 		goto out;
87 
88 	error = expand_files(files, newfd);
89 	if (error < 0)
90 		goto out;
91 
92 	/*
93 	 * If we needed to expand the fs array we
94 	 * might have blocked - try again.
95 	 */
96 	if (error)
97 		goto repeat;
98 
99 	/*
100 	 * We reacquired files_lock, so we are safe as long as
101 	 * we reacquire the fdtable pointer and use it while holding
102 	 * the lock, no one can free it during that time.
103 	 */
104 	fdt = files_fdtable(files);
105 	if (start <= fdt->next_fd)
106 		fdt->next_fd = newfd + 1;
107 
108 	error = newfd;
109 
110 out:
111 	return error;
112 }
113 
114 static int dupfd(struct file *file, unsigned int start)
115 {
116 	struct files_struct * files = current->files;
117 	struct fdtable *fdt;
118 	int fd;
119 
120 	spin_lock(&files->file_lock);
121 	fd = locate_fd(files, file, start);
122 	if (fd >= 0) {
123 		/* locate_fd() may have expanded fdtable, load the ptr */
124 		fdt = files_fdtable(files);
125 		FD_SET(fd, fdt->open_fds);
126 		FD_CLR(fd, fdt->close_on_exec);
127 		spin_unlock(&files->file_lock);
128 		fd_install(fd, file);
129 	} else {
130 		spin_unlock(&files->file_lock);
131 		fput(file);
132 	}
133 
134 	return fd;
135 }
136 
137 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
138 {
139 	int err = -EBADF;
140 	struct file * file, *tofree;
141 	struct files_struct * files = current->files;
142 	struct fdtable *fdt;
143 
144 	spin_lock(&files->file_lock);
145 	if (!(file = fcheck(oldfd)))
146 		goto out_unlock;
147 	err = newfd;
148 	if (newfd == oldfd)
149 		goto out_unlock;
150 	err = -EBADF;
151 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
152 		goto out_unlock;
153 	get_file(file);			/* We are now finished with oldfd */
154 
155 	err = expand_files(files, newfd);
156 	if (err < 0)
157 		goto out_fput;
158 
159 	/* To avoid races with open() and dup(), we will mark the fd as
160 	 * in-use in the open-file bitmap throughout the entire dup2()
161 	 * process.  This is quite safe: do_close() uses the fd array
162 	 * entry, not the bitmap, to decide what work needs to be
163 	 * done.  --sct */
164 	/* Doesn't work. open() might be there first. --AV */
165 
166 	/* Yes. It's a race. In user space. Nothing sane to do */
167 	err = -EBUSY;
168 	fdt = files_fdtable(files);
169 	tofree = fdt->fd[newfd];
170 	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
171 		goto out_fput;
172 
173 	rcu_assign_pointer(fdt->fd[newfd], file);
174 	FD_SET(newfd, fdt->open_fds);
175 	FD_CLR(newfd, fdt->close_on_exec);
176 	spin_unlock(&files->file_lock);
177 
178 	if (tofree)
179 		filp_close(tofree, files);
180 	err = newfd;
181 out:
182 	return err;
183 out_unlock:
184 	spin_unlock(&files->file_lock);
185 	goto out;
186 
187 out_fput:
188 	spin_unlock(&files->file_lock);
189 	fput(file);
190 	goto out;
191 }
192 
193 asmlinkage long sys_dup(unsigned int fildes)
194 {
195 	int ret = -EBADF;
196 	struct file * file = fget(fildes);
197 
198 	if (file)
199 		ret = dupfd(file, 0);
200 	return ret;
201 }
202 
203 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
204 
205 static int setfl(int fd, struct file * filp, unsigned long arg)
206 {
207 	struct inode * inode = filp->f_dentry->d_inode;
208 	int error = 0;
209 
210 	/* O_APPEND cannot be cleared if the file is marked as append-only */
211 	if (!(arg & O_APPEND) && IS_APPEND(inode))
212 		return -EPERM;
213 
214 	/* O_NOATIME can only be set by the owner or superuser */
215 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
216 		if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
217 			return -EPERM;
218 
219 	/* required for strict SunOS emulation */
220 	if (O_NONBLOCK != O_NDELAY)
221 	       if (arg & O_NDELAY)
222 		   arg |= O_NONBLOCK;
223 
224 	if (arg & O_DIRECT) {
225 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
226 			!filp->f_mapping->a_ops->direct_IO)
227 				return -EINVAL;
228 	}
229 
230 	if (filp->f_op && filp->f_op->check_flags)
231 		error = filp->f_op->check_flags(arg);
232 	if (error)
233 		return error;
234 
235 	lock_kernel();
236 	if ((arg ^ filp->f_flags) & FASYNC) {
237 		if (filp->f_op && filp->f_op->fasync) {
238 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
239 			if (error < 0)
240 				goto out;
241 		}
242 	}
243 
244 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
245  out:
246 	unlock_kernel();
247 	return error;
248 }
249 
250 static void f_modown(struct file *filp, unsigned long pid,
251                      uid_t uid, uid_t euid, int force)
252 {
253 	write_lock_irq(&filp->f_owner.lock);
254 	if (force || !filp->f_owner.pid) {
255 		filp->f_owner.pid = pid;
256 		filp->f_owner.uid = uid;
257 		filp->f_owner.euid = euid;
258 	}
259 	write_unlock_irq(&filp->f_owner.lock);
260 }
261 
262 int f_setown(struct file *filp, unsigned long arg, int force)
263 {
264 	int err;
265 
266 	err = security_file_set_fowner(filp);
267 	if (err)
268 		return err;
269 
270 	f_modown(filp, arg, current->uid, current->euid, force);
271 	return 0;
272 }
273 
274 EXPORT_SYMBOL(f_setown);
275 
276 void f_delown(struct file *filp)
277 {
278 	f_modown(filp, 0, 0, 0, 1);
279 }
280 
281 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
282 		struct file *filp)
283 {
284 	long err = -EINVAL;
285 
286 	switch (cmd) {
287 	case F_DUPFD:
288 		get_file(filp);
289 		err = dupfd(filp, arg);
290 		break;
291 	case F_GETFD:
292 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
293 		break;
294 	case F_SETFD:
295 		err = 0;
296 		set_close_on_exec(fd, arg & FD_CLOEXEC);
297 		break;
298 	case F_GETFL:
299 		err = filp->f_flags;
300 		break;
301 	case F_SETFL:
302 		err = setfl(fd, filp, arg);
303 		break;
304 	case F_GETLK:
305 		err = fcntl_getlk(filp, (struct flock __user *) arg);
306 		break;
307 	case F_SETLK:
308 	case F_SETLKW:
309 		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
310 		break;
311 	case F_GETOWN:
312 		/*
313 		 * XXX If f_owner is a process group, the
314 		 * negative return value will get converted
315 		 * into an error.  Oops.  If we keep the
316 		 * current syscall conventions, the only way
317 		 * to fix this will be in libc.
318 		 */
319 		err = filp->f_owner.pid;
320 		force_successful_syscall_return();
321 		break;
322 	case F_SETOWN:
323 		err = f_setown(filp, arg, 1);
324 		break;
325 	case F_GETSIG:
326 		err = filp->f_owner.signum;
327 		break;
328 	case F_SETSIG:
329 		/* arg == 0 restores default behaviour. */
330 		if (!valid_signal(arg)) {
331 			break;
332 		}
333 		err = 0;
334 		filp->f_owner.signum = arg;
335 		break;
336 	case F_GETLEASE:
337 		err = fcntl_getlease(filp);
338 		break;
339 	case F_SETLEASE:
340 		err = fcntl_setlease(fd, filp, arg);
341 		break;
342 	case F_NOTIFY:
343 		err = fcntl_dirnotify(fd, filp, arg);
344 		break;
345 	default:
346 		break;
347 	}
348 	return err;
349 }
350 
351 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
352 {
353 	struct file *filp;
354 	long err = -EBADF;
355 
356 	filp = fget(fd);
357 	if (!filp)
358 		goto out;
359 
360 	err = security_file_fcntl(filp, cmd, arg);
361 	if (err) {
362 		fput(filp);
363 		return err;
364 	}
365 
366 	err = do_fcntl(fd, cmd, arg, filp);
367 
368  	fput(filp);
369 out:
370 	return err;
371 }
372 
373 #if BITS_PER_LONG == 32
374 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
375 {
376 	struct file * filp;
377 	long err;
378 
379 	err = -EBADF;
380 	filp = fget(fd);
381 	if (!filp)
382 		goto out;
383 
384 	err = security_file_fcntl(filp, cmd, arg);
385 	if (err) {
386 		fput(filp);
387 		return err;
388 	}
389 	err = -EBADF;
390 
391 	switch (cmd) {
392 		case F_GETLK64:
393 			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
394 			break;
395 		case F_SETLK64:
396 		case F_SETLKW64:
397 			err = fcntl_setlk64(fd, filp, cmd,
398 					(struct flock64 __user *) arg);
399 			break;
400 		default:
401 			err = do_fcntl(fd, cmd, arg, filp);
402 			break;
403 	}
404 	fput(filp);
405 out:
406 	return err;
407 }
408 #endif
409 
410 /* Table to convert sigio signal codes into poll band bitmaps */
411 
412 static long band_table[NSIGPOLL] = {
413 	POLLIN | POLLRDNORM,			/* POLL_IN */
414 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
415 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
416 	POLLERR,				/* POLL_ERR */
417 	POLLPRI | POLLRDBAND,			/* POLL_PRI */
418 	POLLHUP | POLLERR			/* POLL_HUP */
419 };
420 
421 static inline int sigio_perm(struct task_struct *p,
422                              struct fown_struct *fown, int sig)
423 {
424 	return (((fown->euid == 0) ||
425 		 (fown->euid == p->suid) || (fown->euid == p->uid) ||
426 		 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
427 		!security_file_send_sigiotask(p, fown, sig));
428 }
429 
430 static void send_sigio_to_task(struct task_struct *p,
431 			       struct fown_struct *fown,
432 			       int fd,
433 			       int reason)
434 {
435 	if (!sigio_perm(p, fown, fown->signum))
436 		return;
437 
438 	switch (fown->signum) {
439 		siginfo_t si;
440 		default:
441 			/* Queue a rt signal with the appropriate fd as its
442 			   value.  We use SI_SIGIO as the source, not
443 			   SI_KERNEL, since kernel signals always get
444 			   delivered even if we can't queue.  Failure to
445 			   queue in this case _should_ be reported; we fall
446 			   back to SIGIO in that case. --sct */
447 			si.si_signo = fown->signum;
448 			si.si_errno = 0;
449 		        si.si_code  = reason;
450 			/* Make sure we are called with one of the POLL_*
451 			   reasons, otherwise we could leak kernel stack into
452 			   userspace.  */
453 			if ((reason & __SI_MASK) != __SI_POLL)
454 				BUG();
455 			if (reason - POLL_IN >= NSIGPOLL)
456 				si.si_band  = ~0L;
457 			else
458 				si.si_band = band_table[reason - POLL_IN];
459 			si.si_fd    = fd;
460 			if (!send_group_sig_info(fown->signum, &si, p))
461 				break;
462 		/* fall-through: fall back on the old plain SIGIO signal */
463 		case 0:
464 			send_group_sig_info(SIGIO, SEND_SIG_PRIV, p);
465 	}
466 }
467 
468 void send_sigio(struct fown_struct *fown, int fd, int band)
469 {
470 	struct task_struct *p;
471 	int pid;
472 
473 	read_lock(&fown->lock);
474 	pid = fown->pid;
475 	if (!pid)
476 		goto out_unlock_fown;
477 
478 	read_lock(&tasklist_lock);
479 	if (pid > 0) {
480 		p = find_task_by_pid(pid);
481 		if (p) {
482 			send_sigio_to_task(p, fown, fd, band);
483 		}
484 	} else {
485 		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
486 			send_sigio_to_task(p, fown, fd, band);
487 		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
488 	}
489 	read_unlock(&tasklist_lock);
490  out_unlock_fown:
491 	read_unlock(&fown->lock);
492 }
493 
494 static void send_sigurg_to_task(struct task_struct *p,
495                                 struct fown_struct *fown)
496 {
497 	if (sigio_perm(p, fown, SIGURG))
498 		send_group_sig_info(SIGURG, SEND_SIG_PRIV, p);
499 }
500 
501 int send_sigurg(struct fown_struct *fown)
502 {
503 	struct task_struct *p;
504 	int pid, ret = 0;
505 
506 	read_lock(&fown->lock);
507 	pid = fown->pid;
508 	if (!pid)
509 		goto out_unlock_fown;
510 
511 	ret = 1;
512 
513 	read_lock(&tasklist_lock);
514 	if (pid > 0) {
515 		p = find_task_by_pid(pid);
516 		if (p) {
517 			send_sigurg_to_task(p, fown);
518 		}
519 	} else {
520 		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
521 			send_sigurg_to_task(p, fown);
522 		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
523 	}
524 	read_unlock(&tasklist_lock);
525  out_unlock_fown:
526 	read_unlock(&fown->lock);
527 	return ret;
528 }
529 
530 static DEFINE_RWLOCK(fasync_lock);
531 static kmem_cache_t *fasync_cache;
532 
533 /*
534  * fasync_helper() is used by some character device drivers (mainly mice)
535  * to set up the fasync queue. It returns negative on error, 0 if it did
536  * no changes and positive if it added/deleted the entry.
537  */
538 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
539 {
540 	struct fasync_struct *fa, **fp;
541 	struct fasync_struct *new = NULL;
542 	int result = 0;
543 
544 	if (on) {
545 		new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
546 		if (!new)
547 			return -ENOMEM;
548 	}
549 	write_lock_irq(&fasync_lock);
550 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
551 		if (fa->fa_file == filp) {
552 			if(on) {
553 				fa->fa_fd = fd;
554 				kmem_cache_free(fasync_cache, new);
555 			} else {
556 				*fp = fa->fa_next;
557 				kmem_cache_free(fasync_cache, fa);
558 				result = 1;
559 			}
560 			goto out;
561 		}
562 	}
563 
564 	if (on) {
565 		new->magic = FASYNC_MAGIC;
566 		new->fa_file = filp;
567 		new->fa_fd = fd;
568 		new->fa_next = *fapp;
569 		*fapp = new;
570 		result = 1;
571 	}
572 out:
573 	write_unlock_irq(&fasync_lock);
574 	return result;
575 }
576 
577 EXPORT_SYMBOL(fasync_helper);
578 
579 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
580 {
581 	while (fa) {
582 		struct fown_struct * fown;
583 		if (fa->magic != FASYNC_MAGIC) {
584 			printk(KERN_ERR "kill_fasync: bad magic number in "
585 			       "fasync_struct!\n");
586 			return;
587 		}
588 		fown = &fa->fa_file->f_owner;
589 		/* Don't send SIGURG to processes which have not set a
590 		   queued signum: SIGURG has its own default signalling
591 		   mechanism. */
592 		if (!(sig == SIGURG && fown->signum == 0))
593 			send_sigio(fown, fa->fa_fd, band);
594 		fa = fa->fa_next;
595 	}
596 }
597 
598 EXPORT_SYMBOL(__kill_fasync);
599 
600 void kill_fasync(struct fasync_struct **fp, int sig, int band)
601 {
602 	/* First a quick test without locking: usually
603 	 * the list is empty.
604 	 */
605 	if (*fp) {
606 		read_lock(&fasync_lock);
607 		/* reread *fp after obtaining the lock */
608 		__kill_fasync(*fp, sig, band);
609 		read_unlock(&fasync_lock);
610 	}
611 }
612 EXPORT_SYMBOL(kill_fasync);
613 
614 static int __init fasync_init(void)
615 {
616 	fasync_cache = kmem_cache_create("fasync_cache",
617 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL);
618 	return 0;
619 }
620 
621 module_init(fasync_init)
622