xref: /openbmc/linux/fs/fcntl.c (revision 95e9fd10)
1 /*
2  *  linux/fs/fcntl.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/fdtable.h>
13 #include <linux/capability.h>
14 #include <linux/dnotify.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/pipe_fs_i.h>
18 #include <linux/security.h>
19 #include <linux/ptrace.h>
20 #include <linux/signal.h>
21 #include <linux/rcupdate.h>
22 #include <linux/pid_namespace.h>
23 #include <linux/user_namespace.h>
24 
25 #include <asm/poll.h>
26 #include <asm/siginfo.h>
27 #include <asm/uaccess.h>
28 
29 void set_close_on_exec(unsigned int fd, int flag)
30 {
31 	struct files_struct *files = current->files;
32 	struct fdtable *fdt;
33 	spin_lock(&files->file_lock);
34 	fdt = files_fdtable(files);
35 	if (flag)
36 		__set_close_on_exec(fd, fdt);
37 	else
38 		__clear_close_on_exec(fd, fdt);
39 	spin_unlock(&files->file_lock);
40 }
41 
42 static bool get_close_on_exec(unsigned int fd)
43 {
44 	struct files_struct *files = current->files;
45 	struct fdtable *fdt;
46 	bool res;
47 	rcu_read_lock();
48 	fdt = files_fdtable(files);
49 	res = close_on_exec(fd, fdt);
50 	rcu_read_unlock();
51 	return res;
52 }
53 
54 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
55 {
56 	int err = -EBADF;
57 	struct file * file, *tofree;
58 	struct files_struct * files = current->files;
59 	struct fdtable *fdt;
60 
61 	if ((flags & ~O_CLOEXEC) != 0)
62 		return -EINVAL;
63 
64 	if (unlikely(oldfd == newfd))
65 		return -EINVAL;
66 
67 	spin_lock(&files->file_lock);
68 	err = expand_files(files, newfd);
69 	file = fcheck(oldfd);
70 	if (unlikely(!file))
71 		goto Ebadf;
72 	if (unlikely(err < 0)) {
73 		if (err == -EMFILE)
74 			goto Ebadf;
75 		goto out_unlock;
76 	}
77 	/*
78 	 * We need to detect attempts to do dup2() over allocated but still
79 	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
80 	 * extra work in their equivalent of fget() - they insert struct
81 	 * file immediately after grabbing descriptor, mark it larval if
82 	 * more work (e.g. actual opening) is needed and make sure that
83 	 * fget() treats larval files as absent.  Potentially interesting,
84 	 * but while extra work in fget() is trivial, locking implications
85 	 * and amount of surgery on open()-related paths in VFS are not.
86 	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
87 	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
88 	 * scope of POSIX or SUS, since neither considers shared descriptor
89 	 * tables and this condition does not arise without those.
90 	 */
91 	err = -EBUSY;
92 	fdt = files_fdtable(files);
93 	tofree = fdt->fd[newfd];
94 	if (!tofree && fd_is_open(newfd, fdt))
95 		goto out_unlock;
96 	get_file(file);
97 	rcu_assign_pointer(fdt->fd[newfd], file);
98 	__set_open_fd(newfd, fdt);
99 	if (flags & O_CLOEXEC)
100 		__set_close_on_exec(newfd, fdt);
101 	else
102 		__clear_close_on_exec(newfd, fdt);
103 	spin_unlock(&files->file_lock);
104 
105 	if (tofree)
106 		filp_close(tofree, files);
107 
108 	return newfd;
109 
110 Ebadf:
111 	err = -EBADF;
112 out_unlock:
113 	spin_unlock(&files->file_lock);
114 	return err;
115 }
116 
117 SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
118 {
119 	if (unlikely(newfd == oldfd)) { /* corner case */
120 		struct files_struct *files = current->files;
121 		int retval = oldfd;
122 
123 		rcu_read_lock();
124 		if (!fcheck_files(files, oldfd))
125 			retval = -EBADF;
126 		rcu_read_unlock();
127 		return retval;
128 	}
129 	return sys_dup3(oldfd, newfd, 0);
130 }
131 
132 SYSCALL_DEFINE1(dup, unsigned int, fildes)
133 {
134 	int ret = -EBADF;
135 	struct file *file = fget_raw(fildes);
136 
137 	if (file) {
138 		ret = get_unused_fd();
139 		if (ret >= 0)
140 			fd_install(ret, file);
141 		else
142 			fput(file);
143 	}
144 	return ret;
145 }
146 
147 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
148 
149 static int setfl(int fd, struct file * filp, unsigned long arg)
150 {
151 	struct inode * inode = filp->f_path.dentry->d_inode;
152 	int error = 0;
153 
154 	/*
155 	 * O_APPEND cannot be cleared if the file is marked as append-only
156 	 * and the file is open for write.
157 	 */
158 	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
159 		return -EPERM;
160 
161 	/* O_NOATIME can only be set by the owner or superuser */
162 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
163 		if (!inode_owner_or_capable(inode))
164 			return -EPERM;
165 
166 	/* required for strict SunOS emulation */
167 	if (O_NONBLOCK != O_NDELAY)
168 	       if (arg & O_NDELAY)
169 		   arg |= O_NONBLOCK;
170 
171 	if (arg & O_DIRECT) {
172 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
173 			!filp->f_mapping->a_ops->direct_IO)
174 				return -EINVAL;
175 	}
176 
177 	if (filp->f_op && filp->f_op->check_flags)
178 		error = filp->f_op->check_flags(arg);
179 	if (error)
180 		return error;
181 
182 	/*
183 	 * ->fasync() is responsible for setting the FASYNC bit.
184 	 */
185 	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
186 			filp->f_op->fasync) {
187 		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
188 		if (error < 0)
189 			goto out;
190 		if (error > 0)
191 			error = 0;
192 	}
193 	spin_lock(&filp->f_lock);
194 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
195 	spin_unlock(&filp->f_lock);
196 
197  out:
198 	return error;
199 }
200 
201 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
202                      int force)
203 {
204 	write_lock_irq(&filp->f_owner.lock);
205 	if (force || !filp->f_owner.pid) {
206 		put_pid(filp->f_owner.pid);
207 		filp->f_owner.pid = get_pid(pid);
208 		filp->f_owner.pid_type = type;
209 
210 		if (pid) {
211 			const struct cred *cred = current_cred();
212 			filp->f_owner.uid = cred->uid;
213 			filp->f_owner.euid = cred->euid;
214 		}
215 	}
216 	write_unlock_irq(&filp->f_owner.lock);
217 }
218 
219 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
220 		int force)
221 {
222 	int err;
223 
224 	err = security_file_set_fowner(filp);
225 	if (err)
226 		return err;
227 
228 	f_modown(filp, pid, type, force);
229 	return 0;
230 }
231 EXPORT_SYMBOL(__f_setown);
232 
233 int f_setown(struct file *filp, unsigned long arg, int force)
234 {
235 	enum pid_type type;
236 	struct pid *pid;
237 	int who = arg;
238 	int result;
239 	type = PIDTYPE_PID;
240 	if (who < 0) {
241 		type = PIDTYPE_PGID;
242 		who = -who;
243 	}
244 	rcu_read_lock();
245 	pid = find_vpid(who);
246 	result = __f_setown(filp, pid, type, force);
247 	rcu_read_unlock();
248 	return result;
249 }
250 EXPORT_SYMBOL(f_setown);
251 
252 void f_delown(struct file *filp)
253 {
254 	f_modown(filp, NULL, PIDTYPE_PID, 1);
255 }
256 
257 pid_t f_getown(struct file *filp)
258 {
259 	pid_t pid;
260 	read_lock(&filp->f_owner.lock);
261 	pid = pid_vnr(filp->f_owner.pid);
262 	if (filp->f_owner.pid_type == PIDTYPE_PGID)
263 		pid = -pid;
264 	read_unlock(&filp->f_owner.lock);
265 	return pid;
266 }
267 
268 static int f_setown_ex(struct file *filp, unsigned long arg)
269 {
270 	struct f_owner_ex * __user owner_p = (void * __user)arg;
271 	struct f_owner_ex owner;
272 	struct pid *pid;
273 	int type;
274 	int ret;
275 
276 	ret = copy_from_user(&owner, owner_p, sizeof(owner));
277 	if (ret)
278 		return -EFAULT;
279 
280 	switch (owner.type) {
281 	case F_OWNER_TID:
282 		type = PIDTYPE_MAX;
283 		break;
284 
285 	case F_OWNER_PID:
286 		type = PIDTYPE_PID;
287 		break;
288 
289 	case F_OWNER_PGRP:
290 		type = PIDTYPE_PGID;
291 		break;
292 
293 	default:
294 		return -EINVAL;
295 	}
296 
297 	rcu_read_lock();
298 	pid = find_vpid(owner.pid);
299 	if (owner.pid && !pid)
300 		ret = -ESRCH;
301 	else
302 		ret = __f_setown(filp, pid, type, 1);
303 	rcu_read_unlock();
304 
305 	return ret;
306 }
307 
308 static int f_getown_ex(struct file *filp, unsigned long arg)
309 {
310 	struct f_owner_ex * __user owner_p = (void * __user)arg;
311 	struct f_owner_ex owner;
312 	int ret = 0;
313 
314 	read_lock(&filp->f_owner.lock);
315 	owner.pid = pid_vnr(filp->f_owner.pid);
316 	switch (filp->f_owner.pid_type) {
317 	case PIDTYPE_MAX:
318 		owner.type = F_OWNER_TID;
319 		break;
320 
321 	case PIDTYPE_PID:
322 		owner.type = F_OWNER_PID;
323 		break;
324 
325 	case PIDTYPE_PGID:
326 		owner.type = F_OWNER_PGRP;
327 		break;
328 
329 	default:
330 		WARN_ON(1);
331 		ret = -EINVAL;
332 		break;
333 	}
334 	read_unlock(&filp->f_owner.lock);
335 
336 	if (!ret) {
337 		ret = copy_to_user(owner_p, &owner, sizeof(owner));
338 		if (ret)
339 			ret = -EFAULT;
340 	}
341 	return ret;
342 }
343 
344 #ifdef CONFIG_CHECKPOINT_RESTORE
345 static int f_getowner_uids(struct file *filp, unsigned long arg)
346 {
347 	struct user_namespace *user_ns = current_user_ns();
348 	uid_t * __user dst = (void * __user)arg;
349 	uid_t src[2];
350 	int err;
351 
352 	read_lock(&filp->f_owner.lock);
353 	src[0] = from_kuid(user_ns, filp->f_owner.uid);
354 	src[1] = from_kuid(user_ns, filp->f_owner.euid);
355 	read_unlock(&filp->f_owner.lock);
356 
357 	err  = put_user(src[0], &dst[0]);
358 	err |= put_user(src[1], &dst[1]);
359 
360 	return err;
361 }
362 #else
363 static int f_getowner_uids(struct file *filp, unsigned long arg)
364 {
365 	return -EINVAL;
366 }
367 #endif
368 
369 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
370 		struct file *filp)
371 {
372 	long err = -EINVAL;
373 
374 	switch (cmd) {
375 	case F_DUPFD:
376 	case F_DUPFD_CLOEXEC:
377 		if (arg >= rlimit(RLIMIT_NOFILE))
378 			break;
379 		err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
380 		if (err >= 0) {
381 			get_file(filp);
382 			fd_install(err, filp);
383 		}
384 		break;
385 	case F_GETFD:
386 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
387 		break;
388 	case F_SETFD:
389 		err = 0;
390 		set_close_on_exec(fd, arg & FD_CLOEXEC);
391 		break;
392 	case F_GETFL:
393 		err = filp->f_flags;
394 		break;
395 	case F_SETFL:
396 		err = setfl(fd, filp, arg);
397 		break;
398 	case F_GETLK:
399 		err = fcntl_getlk(filp, (struct flock __user *) arg);
400 		break;
401 	case F_SETLK:
402 	case F_SETLKW:
403 		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
404 		break;
405 	case F_GETOWN:
406 		/*
407 		 * XXX If f_owner is a process group, the
408 		 * negative return value will get converted
409 		 * into an error.  Oops.  If we keep the
410 		 * current syscall conventions, the only way
411 		 * to fix this will be in libc.
412 		 */
413 		err = f_getown(filp);
414 		force_successful_syscall_return();
415 		break;
416 	case F_SETOWN:
417 		err = f_setown(filp, arg, 1);
418 		break;
419 	case F_GETOWN_EX:
420 		err = f_getown_ex(filp, arg);
421 		break;
422 	case F_SETOWN_EX:
423 		err = f_setown_ex(filp, arg);
424 		break;
425 	case F_GETOWNER_UIDS:
426 		err = f_getowner_uids(filp, arg);
427 		break;
428 	case F_GETSIG:
429 		err = filp->f_owner.signum;
430 		break;
431 	case F_SETSIG:
432 		/* arg == 0 restores default behaviour. */
433 		if (!valid_signal(arg)) {
434 			break;
435 		}
436 		err = 0;
437 		filp->f_owner.signum = arg;
438 		break;
439 	case F_GETLEASE:
440 		err = fcntl_getlease(filp);
441 		break;
442 	case F_SETLEASE:
443 		err = fcntl_setlease(fd, filp, arg);
444 		break;
445 	case F_NOTIFY:
446 		err = fcntl_dirnotify(fd, filp, arg);
447 		break;
448 	case F_SETPIPE_SZ:
449 	case F_GETPIPE_SZ:
450 		err = pipe_fcntl(filp, cmd, arg);
451 		break;
452 	default:
453 		break;
454 	}
455 	return err;
456 }
457 
458 static int check_fcntl_cmd(unsigned cmd)
459 {
460 	switch (cmd) {
461 	case F_DUPFD:
462 	case F_DUPFD_CLOEXEC:
463 	case F_GETFD:
464 	case F_SETFD:
465 	case F_GETFL:
466 		return 1;
467 	}
468 	return 0;
469 }
470 
471 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
472 {
473 	struct file *filp;
474 	int fput_needed;
475 	long err = -EBADF;
476 
477 	filp = fget_raw_light(fd, &fput_needed);
478 	if (!filp)
479 		goto out;
480 
481 	if (unlikely(filp->f_mode & FMODE_PATH)) {
482 		if (!check_fcntl_cmd(cmd))
483 			goto out1;
484 	}
485 
486 	err = security_file_fcntl(filp, cmd, arg);
487 	if (!err)
488 		err = do_fcntl(fd, cmd, arg, filp);
489 
490 out1:
491  	fput_light(filp, fput_needed);
492 out:
493 	return err;
494 }
495 
496 #if BITS_PER_LONG == 32
497 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
498 		unsigned long, arg)
499 {
500 	struct file * filp;
501 	long err = -EBADF;
502 	int fput_needed;
503 
504 	filp = fget_raw_light(fd, &fput_needed);
505 	if (!filp)
506 		goto out;
507 
508 	if (unlikely(filp->f_mode & FMODE_PATH)) {
509 		if (!check_fcntl_cmd(cmd))
510 			goto out1;
511 	}
512 
513 	err = security_file_fcntl(filp, cmd, arg);
514 	if (err)
515 		goto out1;
516 
517 	switch (cmd) {
518 		case F_GETLK64:
519 			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
520 			break;
521 		case F_SETLK64:
522 		case F_SETLKW64:
523 			err = fcntl_setlk64(fd, filp, cmd,
524 					(struct flock64 __user *) arg);
525 			break;
526 		default:
527 			err = do_fcntl(fd, cmd, arg, filp);
528 			break;
529 	}
530 out1:
531 	fput_light(filp, fput_needed);
532 out:
533 	return err;
534 }
535 #endif
536 
537 /* Table to convert sigio signal codes into poll band bitmaps */
538 
539 static const long band_table[NSIGPOLL] = {
540 	POLLIN | POLLRDNORM,			/* POLL_IN */
541 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
542 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
543 	POLLERR,				/* POLL_ERR */
544 	POLLPRI | POLLRDBAND,			/* POLL_PRI */
545 	POLLHUP | POLLERR			/* POLL_HUP */
546 };
547 
548 static inline int sigio_perm(struct task_struct *p,
549                              struct fown_struct *fown, int sig)
550 {
551 	const struct cred *cred;
552 	int ret;
553 
554 	rcu_read_lock();
555 	cred = __task_cred(p);
556 	ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
557 		uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
558 		uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
559 	       !security_file_send_sigiotask(p, fown, sig));
560 	rcu_read_unlock();
561 	return ret;
562 }
563 
564 static void send_sigio_to_task(struct task_struct *p,
565 			       struct fown_struct *fown,
566 			       int fd, int reason, int group)
567 {
568 	/*
569 	 * F_SETSIG can change ->signum lockless in parallel, make
570 	 * sure we read it once and use the same value throughout.
571 	 */
572 	int signum = ACCESS_ONCE(fown->signum);
573 
574 	if (!sigio_perm(p, fown, signum))
575 		return;
576 
577 	switch (signum) {
578 		siginfo_t si;
579 		default:
580 			/* Queue a rt signal with the appropriate fd as its
581 			   value.  We use SI_SIGIO as the source, not
582 			   SI_KERNEL, since kernel signals always get
583 			   delivered even if we can't queue.  Failure to
584 			   queue in this case _should_ be reported; we fall
585 			   back to SIGIO in that case. --sct */
586 			si.si_signo = signum;
587 			si.si_errno = 0;
588 		        si.si_code  = reason;
589 			/* Make sure we are called with one of the POLL_*
590 			   reasons, otherwise we could leak kernel stack into
591 			   userspace.  */
592 			BUG_ON((reason & __SI_MASK) != __SI_POLL);
593 			if (reason - POLL_IN >= NSIGPOLL)
594 				si.si_band  = ~0L;
595 			else
596 				si.si_band = band_table[reason - POLL_IN];
597 			si.si_fd    = fd;
598 			if (!do_send_sig_info(signum, &si, p, group))
599 				break;
600 		/* fall-through: fall back on the old plain SIGIO signal */
601 		case 0:
602 			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
603 	}
604 }
605 
606 void send_sigio(struct fown_struct *fown, int fd, int band)
607 {
608 	struct task_struct *p;
609 	enum pid_type type;
610 	struct pid *pid;
611 	int group = 1;
612 
613 	read_lock(&fown->lock);
614 
615 	type = fown->pid_type;
616 	if (type == PIDTYPE_MAX) {
617 		group = 0;
618 		type = PIDTYPE_PID;
619 	}
620 
621 	pid = fown->pid;
622 	if (!pid)
623 		goto out_unlock_fown;
624 
625 	read_lock(&tasklist_lock);
626 	do_each_pid_task(pid, type, p) {
627 		send_sigio_to_task(p, fown, fd, band, group);
628 	} while_each_pid_task(pid, type, p);
629 	read_unlock(&tasklist_lock);
630  out_unlock_fown:
631 	read_unlock(&fown->lock);
632 }
633 
634 static void send_sigurg_to_task(struct task_struct *p,
635 				struct fown_struct *fown, int group)
636 {
637 	if (sigio_perm(p, fown, SIGURG))
638 		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
639 }
640 
641 int send_sigurg(struct fown_struct *fown)
642 {
643 	struct task_struct *p;
644 	enum pid_type type;
645 	struct pid *pid;
646 	int group = 1;
647 	int ret = 0;
648 
649 	read_lock(&fown->lock);
650 
651 	type = fown->pid_type;
652 	if (type == PIDTYPE_MAX) {
653 		group = 0;
654 		type = PIDTYPE_PID;
655 	}
656 
657 	pid = fown->pid;
658 	if (!pid)
659 		goto out_unlock_fown;
660 
661 	ret = 1;
662 
663 	read_lock(&tasklist_lock);
664 	do_each_pid_task(pid, type, p) {
665 		send_sigurg_to_task(p, fown, group);
666 	} while_each_pid_task(pid, type, p);
667 	read_unlock(&tasklist_lock);
668  out_unlock_fown:
669 	read_unlock(&fown->lock);
670 	return ret;
671 }
672 
673 static DEFINE_SPINLOCK(fasync_lock);
674 static struct kmem_cache *fasync_cache __read_mostly;
675 
676 static void fasync_free_rcu(struct rcu_head *head)
677 {
678 	kmem_cache_free(fasync_cache,
679 			container_of(head, struct fasync_struct, fa_rcu));
680 }
681 
682 /*
683  * Remove a fasync entry. If successfully removed, return
684  * positive and clear the FASYNC flag. If no entry exists,
685  * do nothing and return 0.
686  *
687  * NOTE! It is very important that the FASYNC flag always
688  * match the state "is the filp on a fasync list".
689  *
690  */
691 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
692 {
693 	struct fasync_struct *fa, **fp;
694 	int result = 0;
695 
696 	spin_lock(&filp->f_lock);
697 	spin_lock(&fasync_lock);
698 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
699 		if (fa->fa_file != filp)
700 			continue;
701 
702 		spin_lock_irq(&fa->fa_lock);
703 		fa->fa_file = NULL;
704 		spin_unlock_irq(&fa->fa_lock);
705 
706 		*fp = fa->fa_next;
707 		call_rcu(&fa->fa_rcu, fasync_free_rcu);
708 		filp->f_flags &= ~FASYNC;
709 		result = 1;
710 		break;
711 	}
712 	spin_unlock(&fasync_lock);
713 	spin_unlock(&filp->f_lock);
714 	return result;
715 }
716 
717 struct fasync_struct *fasync_alloc(void)
718 {
719 	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
720 }
721 
722 /*
723  * NOTE! This can be used only for unused fasync entries:
724  * entries that actually got inserted on the fasync list
725  * need to be released by rcu - see fasync_remove_entry.
726  */
727 void fasync_free(struct fasync_struct *new)
728 {
729 	kmem_cache_free(fasync_cache, new);
730 }
731 
732 /*
733  * Insert a new entry into the fasync list.  Return the pointer to the
734  * old one if we didn't use the new one.
735  *
736  * NOTE! It is very important that the FASYNC flag always
737  * match the state "is the filp on a fasync list".
738  */
739 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
740 {
741         struct fasync_struct *fa, **fp;
742 
743 	spin_lock(&filp->f_lock);
744 	spin_lock(&fasync_lock);
745 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
746 		if (fa->fa_file != filp)
747 			continue;
748 
749 		spin_lock_irq(&fa->fa_lock);
750 		fa->fa_fd = fd;
751 		spin_unlock_irq(&fa->fa_lock);
752 		goto out;
753 	}
754 
755 	spin_lock_init(&new->fa_lock);
756 	new->magic = FASYNC_MAGIC;
757 	new->fa_file = filp;
758 	new->fa_fd = fd;
759 	new->fa_next = *fapp;
760 	rcu_assign_pointer(*fapp, new);
761 	filp->f_flags |= FASYNC;
762 
763 out:
764 	spin_unlock(&fasync_lock);
765 	spin_unlock(&filp->f_lock);
766 	return fa;
767 }
768 
769 /*
770  * Add a fasync entry. Return negative on error, positive if
771  * added, and zero if did nothing but change an existing one.
772  */
773 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
774 {
775 	struct fasync_struct *new;
776 
777 	new = fasync_alloc();
778 	if (!new)
779 		return -ENOMEM;
780 
781 	/*
782 	 * fasync_insert_entry() returns the old (update) entry if
783 	 * it existed.
784 	 *
785 	 * So free the (unused) new entry and return 0 to let the
786 	 * caller know that we didn't add any new fasync entries.
787 	 */
788 	if (fasync_insert_entry(fd, filp, fapp, new)) {
789 		fasync_free(new);
790 		return 0;
791 	}
792 
793 	return 1;
794 }
795 
796 /*
797  * fasync_helper() is used by almost all character device drivers
798  * to set up the fasync queue, and for regular files by the file
799  * lease code. It returns negative on error, 0 if it did no changes
800  * and positive if it added/deleted the entry.
801  */
802 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
803 {
804 	if (!on)
805 		return fasync_remove_entry(filp, fapp);
806 	return fasync_add_entry(fd, filp, fapp);
807 }
808 
809 EXPORT_SYMBOL(fasync_helper);
810 
811 /*
812  * rcu_read_lock() is held
813  */
814 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
815 {
816 	while (fa) {
817 		struct fown_struct *fown;
818 		unsigned long flags;
819 
820 		if (fa->magic != FASYNC_MAGIC) {
821 			printk(KERN_ERR "kill_fasync: bad magic number in "
822 			       "fasync_struct!\n");
823 			return;
824 		}
825 		spin_lock_irqsave(&fa->fa_lock, flags);
826 		if (fa->fa_file) {
827 			fown = &fa->fa_file->f_owner;
828 			/* Don't send SIGURG to processes which have not set a
829 			   queued signum: SIGURG has its own default signalling
830 			   mechanism. */
831 			if (!(sig == SIGURG && fown->signum == 0))
832 				send_sigio(fown, fa->fa_fd, band);
833 		}
834 		spin_unlock_irqrestore(&fa->fa_lock, flags);
835 		fa = rcu_dereference(fa->fa_next);
836 	}
837 }
838 
839 void kill_fasync(struct fasync_struct **fp, int sig, int band)
840 {
841 	/* First a quick test without locking: usually
842 	 * the list is empty.
843 	 */
844 	if (*fp) {
845 		rcu_read_lock();
846 		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
847 		rcu_read_unlock();
848 	}
849 }
850 EXPORT_SYMBOL(kill_fasync);
851 
852 static int __init fcntl_init(void)
853 {
854 	/*
855 	 * Please add new bits here to ensure allocation uniqueness.
856 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
857 	 * is defined as O_NONBLOCK on some platforms and not on others.
858 	 */
859 	BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
860 		O_RDONLY	| O_WRONLY	| O_RDWR	|
861 		O_CREAT		| O_EXCL	| O_NOCTTY	|
862 		O_TRUNC		| O_APPEND	| /* O_NONBLOCK	| */
863 		__O_SYNC	| O_DSYNC	| FASYNC	|
864 		O_DIRECT	| O_LARGEFILE	| O_DIRECTORY	|
865 		O_NOFOLLOW	| O_NOATIME	| O_CLOEXEC	|
866 		__FMODE_EXEC	| O_PATH
867 		));
868 
869 	fasync_cache = kmem_cache_create("fasync_cache",
870 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
871 	return 0;
872 }
873 
874 module_init(fcntl_init)
875