xref: /openbmc/linux/fs/fcntl.c (revision b5f184fb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/fcntl.c
4  *
5  *  Copyright (C) 1991, 1992  Linus Torvalds
6  */
7 
8 #include <linux/syscalls.h>
9 #include <linux/init.h>
10 #include <linux/mm.h>
11 #include <linux/sched/task.h>
12 #include <linux/fs.h>
13 #include <linux/file.h>
14 #include <linux/fdtable.h>
15 #include <linux/capability.h>
16 #include <linux/dnotify.h>
17 #include <linux/slab.h>
18 #include <linux/module.h>
19 #include <linux/pipe_fs_i.h>
20 #include <linux/security.h>
21 #include <linux/ptrace.h>
22 #include <linux/signal.h>
23 #include <linux/rcupdate.h>
24 #include <linux/pid_namespace.h>
25 #include <linux/user_namespace.h>
26 #include <linux/memfd.h>
27 #include <linux/compat.h>
28 
29 #include <linux/poll.h>
30 #include <asm/siginfo.h>
31 #include <linux/uaccess.h>
32 
33 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
34 
35 static int setfl(int fd, struct file * filp, unsigned long arg)
36 {
37 	struct inode * inode = file_inode(filp);
38 	int error = 0;
39 
40 	/*
41 	 * O_APPEND cannot be cleared if the file is marked as append-only
42 	 * and the file is open for write.
43 	 */
44 	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
45 		return -EPERM;
46 
47 	/* O_NOATIME can only be set by the owner or superuser */
48 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
49 		if (!inode_owner_or_capable(inode))
50 			return -EPERM;
51 
52 	/* required for strict SunOS emulation */
53 	if (O_NONBLOCK != O_NDELAY)
54 	       if (arg & O_NDELAY)
55 		   arg |= O_NONBLOCK;
56 
57 	/* Pipe packetized mode is controlled by O_DIRECT flag */
58 	if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
59 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
60 			!filp->f_mapping->a_ops->direct_IO)
61 				return -EINVAL;
62 	}
63 
64 	if (filp->f_op->check_flags)
65 		error = filp->f_op->check_flags(arg);
66 	if (error)
67 		return error;
68 
69 	/*
70 	 * ->fasync() is responsible for setting the FASYNC bit.
71 	 */
72 	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
73 		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
74 		if (error < 0)
75 			goto out;
76 		if (error > 0)
77 			error = 0;
78 	}
79 	spin_lock(&filp->f_lock);
80 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
81 	spin_unlock(&filp->f_lock);
82 
83  out:
84 	return error;
85 }
86 
87 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
88                      int force)
89 {
90 	write_lock_irq(&filp->f_owner.lock);
91 	if (force || !filp->f_owner.pid) {
92 		put_pid(filp->f_owner.pid);
93 		filp->f_owner.pid = get_pid(pid);
94 		filp->f_owner.pid_type = type;
95 
96 		if (pid) {
97 			const struct cred *cred = current_cred();
98 			filp->f_owner.uid = cred->uid;
99 			filp->f_owner.euid = cred->euid;
100 		}
101 	}
102 	write_unlock_irq(&filp->f_owner.lock);
103 }
104 
105 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
106 		int force)
107 {
108 	security_file_set_fowner(filp);
109 	f_modown(filp, pid, type, force);
110 }
111 EXPORT_SYMBOL(__f_setown);
112 
113 int f_setown(struct file *filp, unsigned long arg, int force)
114 {
115 	enum pid_type type;
116 	struct pid *pid = NULL;
117 	int who = arg, ret = 0;
118 
119 	type = PIDTYPE_TGID;
120 	if (who < 0) {
121 		/* avoid overflow below */
122 		if (who == INT_MIN)
123 			return -EINVAL;
124 
125 		type = PIDTYPE_PGID;
126 		who = -who;
127 	}
128 
129 	rcu_read_lock();
130 	if (who) {
131 		pid = find_vpid(who);
132 		if (!pid)
133 			ret = -ESRCH;
134 	}
135 
136 	if (!ret)
137 		__f_setown(filp, pid, type, force);
138 	rcu_read_unlock();
139 
140 	return ret;
141 }
142 EXPORT_SYMBOL(f_setown);
143 
144 void f_delown(struct file *filp)
145 {
146 	f_modown(filp, NULL, PIDTYPE_TGID, 1);
147 }
148 
149 pid_t f_getown(struct file *filp)
150 {
151 	pid_t pid = 0;
152 	read_lock(&filp->f_owner.lock);
153 	rcu_read_lock();
154 	if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
155 		pid = pid_vnr(filp->f_owner.pid);
156 		if (filp->f_owner.pid_type == PIDTYPE_PGID)
157 			pid = -pid;
158 	}
159 	rcu_read_unlock();
160 	read_unlock(&filp->f_owner.lock);
161 	return pid;
162 }
163 
164 static int f_setown_ex(struct file *filp, unsigned long arg)
165 {
166 	struct f_owner_ex __user *owner_p = (void __user *)arg;
167 	struct f_owner_ex owner;
168 	struct pid *pid;
169 	int type;
170 	int ret;
171 
172 	ret = copy_from_user(&owner, owner_p, sizeof(owner));
173 	if (ret)
174 		return -EFAULT;
175 
176 	switch (owner.type) {
177 	case F_OWNER_TID:
178 		type = PIDTYPE_PID;
179 		break;
180 
181 	case F_OWNER_PID:
182 		type = PIDTYPE_TGID;
183 		break;
184 
185 	case F_OWNER_PGRP:
186 		type = PIDTYPE_PGID;
187 		break;
188 
189 	default:
190 		return -EINVAL;
191 	}
192 
193 	rcu_read_lock();
194 	pid = find_vpid(owner.pid);
195 	if (owner.pid && !pid)
196 		ret = -ESRCH;
197 	else
198 		 __f_setown(filp, pid, type, 1);
199 	rcu_read_unlock();
200 
201 	return ret;
202 }
203 
204 static int f_getown_ex(struct file *filp, unsigned long arg)
205 {
206 	struct f_owner_ex __user *owner_p = (void __user *)arg;
207 	struct f_owner_ex owner = {};
208 	int ret = 0;
209 
210 	read_lock(&filp->f_owner.lock);
211 	rcu_read_lock();
212 	if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
213 		owner.pid = pid_vnr(filp->f_owner.pid);
214 	rcu_read_unlock();
215 	switch (filp->f_owner.pid_type) {
216 	case PIDTYPE_PID:
217 		owner.type = F_OWNER_TID;
218 		break;
219 
220 	case PIDTYPE_TGID:
221 		owner.type = F_OWNER_PID;
222 		break;
223 
224 	case PIDTYPE_PGID:
225 		owner.type = F_OWNER_PGRP;
226 		break;
227 
228 	default:
229 		WARN_ON(1);
230 		ret = -EINVAL;
231 		break;
232 	}
233 	read_unlock(&filp->f_owner.lock);
234 
235 	if (!ret) {
236 		ret = copy_to_user(owner_p, &owner, sizeof(owner));
237 		if (ret)
238 			ret = -EFAULT;
239 	}
240 	return ret;
241 }
242 
243 #ifdef CONFIG_CHECKPOINT_RESTORE
244 static int f_getowner_uids(struct file *filp, unsigned long arg)
245 {
246 	struct user_namespace *user_ns = current_user_ns();
247 	uid_t __user *dst = (void __user *)arg;
248 	uid_t src[2];
249 	int err;
250 
251 	read_lock(&filp->f_owner.lock);
252 	src[0] = from_kuid(user_ns, filp->f_owner.uid);
253 	src[1] = from_kuid(user_ns, filp->f_owner.euid);
254 	read_unlock(&filp->f_owner.lock);
255 
256 	err  = put_user(src[0], &dst[0]);
257 	err |= put_user(src[1], &dst[1]);
258 
259 	return err;
260 }
261 #else
262 static int f_getowner_uids(struct file *filp, unsigned long arg)
263 {
264 	return -EINVAL;
265 }
266 #endif
267 
268 static bool rw_hint_valid(enum rw_hint hint)
269 {
270 	switch (hint) {
271 	case RWH_WRITE_LIFE_NOT_SET:
272 	case RWH_WRITE_LIFE_NONE:
273 	case RWH_WRITE_LIFE_SHORT:
274 	case RWH_WRITE_LIFE_MEDIUM:
275 	case RWH_WRITE_LIFE_LONG:
276 	case RWH_WRITE_LIFE_EXTREME:
277 		return true;
278 	default:
279 		return false;
280 	}
281 }
282 
283 static long fcntl_rw_hint(struct file *file, unsigned int cmd,
284 			  unsigned long arg)
285 {
286 	struct inode *inode = file_inode(file);
287 	u64 __user *argp = (u64 __user *)arg;
288 	enum rw_hint hint;
289 	u64 h;
290 
291 	switch (cmd) {
292 	case F_GET_FILE_RW_HINT:
293 		h = file_write_hint(file);
294 		if (copy_to_user(argp, &h, sizeof(*argp)))
295 			return -EFAULT;
296 		return 0;
297 	case F_SET_FILE_RW_HINT:
298 		if (copy_from_user(&h, argp, sizeof(h)))
299 			return -EFAULT;
300 		hint = (enum rw_hint) h;
301 		if (!rw_hint_valid(hint))
302 			return -EINVAL;
303 
304 		spin_lock(&file->f_lock);
305 		file->f_write_hint = hint;
306 		spin_unlock(&file->f_lock);
307 		return 0;
308 	case F_GET_RW_HINT:
309 		h = inode->i_write_hint;
310 		if (copy_to_user(argp, &h, sizeof(*argp)))
311 			return -EFAULT;
312 		return 0;
313 	case F_SET_RW_HINT:
314 		if (copy_from_user(&h, argp, sizeof(h)))
315 			return -EFAULT;
316 		hint = (enum rw_hint) h;
317 		if (!rw_hint_valid(hint))
318 			return -EINVAL;
319 
320 		inode_lock(inode);
321 		inode->i_write_hint = hint;
322 		inode_unlock(inode);
323 		return 0;
324 	default:
325 		return -EINVAL;
326 	}
327 }
328 
329 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
330 		struct file *filp)
331 {
332 	void __user *argp = (void __user *)arg;
333 	struct flock flock;
334 	long err = -EINVAL;
335 
336 	switch (cmd) {
337 	case F_DUPFD:
338 		err = f_dupfd(arg, filp, 0);
339 		break;
340 	case F_DUPFD_CLOEXEC:
341 		err = f_dupfd(arg, filp, O_CLOEXEC);
342 		break;
343 	case F_GETFD:
344 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
345 		break;
346 	case F_SETFD:
347 		err = 0;
348 		set_close_on_exec(fd, arg & FD_CLOEXEC);
349 		break;
350 	case F_GETFL:
351 		err = filp->f_flags;
352 		break;
353 	case F_SETFL:
354 		err = setfl(fd, filp, arg);
355 		break;
356 #if BITS_PER_LONG != 32
357 	/* 32-bit arches must use fcntl64() */
358 	case F_OFD_GETLK:
359 #endif
360 	case F_GETLK:
361 		if (copy_from_user(&flock, argp, sizeof(flock)))
362 			return -EFAULT;
363 		err = fcntl_getlk(filp, cmd, &flock);
364 		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
365 			return -EFAULT;
366 		break;
367 #if BITS_PER_LONG != 32
368 	/* 32-bit arches must use fcntl64() */
369 	case F_OFD_SETLK:
370 	case F_OFD_SETLKW:
371 #endif
372 		fallthrough;
373 	case F_SETLK:
374 	case F_SETLKW:
375 		if (copy_from_user(&flock, argp, sizeof(flock)))
376 			return -EFAULT;
377 		err = fcntl_setlk(fd, filp, cmd, &flock);
378 		break;
379 	case F_GETOWN:
380 		/*
381 		 * XXX If f_owner is a process group, the
382 		 * negative return value will get converted
383 		 * into an error.  Oops.  If we keep the
384 		 * current syscall conventions, the only way
385 		 * to fix this will be in libc.
386 		 */
387 		err = f_getown(filp);
388 		force_successful_syscall_return();
389 		break;
390 	case F_SETOWN:
391 		err = f_setown(filp, arg, 1);
392 		break;
393 	case F_GETOWN_EX:
394 		err = f_getown_ex(filp, arg);
395 		break;
396 	case F_SETOWN_EX:
397 		err = f_setown_ex(filp, arg);
398 		break;
399 	case F_GETOWNER_UIDS:
400 		err = f_getowner_uids(filp, arg);
401 		break;
402 	case F_GETSIG:
403 		err = filp->f_owner.signum;
404 		break;
405 	case F_SETSIG:
406 		/* arg == 0 restores default behaviour. */
407 		if (!valid_signal(arg)) {
408 			break;
409 		}
410 		err = 0;
411 		filp->f_owner.signum = arg;
412 		break;
413 	case F_GETLEASE:
414 		err = fcntl_getlease(filp);
415 		break;
416 	case F_SETLEASE:
417 		err = fcntl_setlease(fd, filp, arg);
418 		break;
419 	case F_NOTIFY:
420 		err = fcntl_dirnotify(fd, filp, arg);
421 		break;
422 	case F_SETPIPE_SZ:
423 	case F_GETPIPE_SZ:
424 		err = pipe_fcntl(filp, cmd, arg);
425 		break;
426 	case F_ADD_SEALS:
427 	case F_GET_SEALS:
428 		err = memfd_fcntl(filp, cmd, arg);
429 		break;
430 	case F_GET_RW_HINT:
431 	case F_SET_RW_HINT:
432 	case F_GET_FILE_RW_HINT:
433 	case F_SET_FILE_RW_HINT:
434 		err = fcntl_rw_hint(filp, cmd, arg);
435 		break;
436 	default:
437 		break;
438 	}
439 	return err;
440 }
441 
442 static int check_fcntl_cmd(unsigned cmd)
443 {
444 	switch (cmd) {
445 	case F_DUPFD:
446 	case F_DUPFD_CLOEXEC:
447 	case F_GETFD:
448 	case F_SETFD:
449 	case F_GETFL:
450 		return 1;
451 	}
452 	return 0;
453 }
454 
455 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
456 {
457 	struct fd f = fdget_raw(fd);
458 	long err = -EBADF;
459 
460 	if (!f.file)
461 		goto out;
462 
463 	if (unlikely(f.file->f_mode & FMODE_PATH)) {
464 		if (!check_fcntl_cmd(cmd))
465 			goto out1;
466 	}
467 
468 	err = security_file_fcntl(f.file, cmd, arg);
469 	if (!err)
470 		err = do_fcntl(fd, cmd, arg, f.file);
471 
472 out1:
473  	fdput(f);
474 out:
475 	return err;
476 }
477 
478 #if BITS_PER_LONG == 32
479 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
480 		unsigned long, arg)
481 {
482 	void __user *argp = (void __user *)arg;
483 	struct fd f = fdget_raw(fd);
484 	struct flock64 flock;
485 	long err = -EBADF;
486 
487 	if (!f.file)
488 		goto out;
489 
490 	if (unlikely(f.file->f_mode & FMODE_PATH)) {
491 		if (!check_fcntl_cmd(cmd))
492 			goto out1;
493 	}
494 
495 	err = security_file_fcntl(f.file, cmd, arg);
496 	if (err)
497 		goto out1;
498 
499 	switch (cmd) {
500 	case F_GETLK64:
501 	case F_OFD_GETLK:
502 		err = -EFAULT;
503 		if (copy_from_user(&flock, argp, sizeof(flock)))
504 			break;
505 		err = fcntl_getlk64(f.file, cmd, &flock);
506 		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
507 			err = -EFAULT;
508 		break;
509 	case F_SETLK64:
510 	case F_SETLKW64:
511 	case F_OFD_SETLK:
512 	case F_OFD_SETLKW:
513 		err = -EFAULT;
514 		if (copy_from_user(&flock, argp, sizeof(flock)))
515 			break;
516 		err = fcntl_setlk64(fd, f.file, cmd, &flock);
517 		break;
518 	default:
519 		err = do_fcntl(fd, cmd, arg, f.file);
520 		break;
521 	}
522 out1:
523 	fdput(f);
524 out:
525 	return err;
526 }
527 #endif
528 
529 #ifdef CONFIG_COMPAT
530 /* careful - don't use anywhere else */
531 #define copy_flock_fields(dst, src)		\
532 	(dst)->l_type = (src)->l_type;		\
533 	(dst)->l_whence = (src)->l_whence;	\
534 	(dst)->l_start = (src)->l_start;	\
535 	(dst)->l_len = (src)->l_len;		\
536 	(dst)->l_pid = (src)->l_pid;
537 
538 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
539 {
540 	struct compat_flock fl;
541 
542 	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
543 		return -EFAULT;
544 	copy_flock_fields(kfl, &fl);
545 	return 0;
546 }
547 
548 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
549 {
550 	struct compat_flock64 fl;
551 
552 	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
553 		return -EFAULT;
554 	copy_flock_fields(kfl, &fl);
555 	return 0;
556 }
557 
558 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
559 {
560 	struct compat_flock fl;
561 
562 	memset(&fl, 0, sizeof(struct compat_flock));
563 	copy_flock_fields(&fl, kfl);
564 	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
565 		return -EFAULT;
566 	return 0;
567 }
568 
569 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
570 {
571 	struct compat_flock64 fl;
572 
573 	BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
574 	BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
575 
576 	memset(&fl, 0, sizeof(struct compat_flock64));
577 	copy_flock_fields(&fl, kfl);
578 	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
579 		return -EFAULT;
580 	return 0;
581 }
582 #undef copy_flock_fields
583 
584 static unsigned int
585 convert_fcntl_cmd(unsigned int cmd)
586 {
587 	switch (cmd) {
588 	case F_GETLK64:
589 		return F_GETLK;
590 	case F_SETLK64:
591 		return F_SETLK;
592 	case F_SETLKW64:
593 		return F_SETLKW;
594 	}
595 
596 	return cmd;
597 }
598 
599 /*
600  * GETLK was successful and we need to return the data, but it needs to fit in
601  * the compat structure.
602  * l_start shouldn't be too big, unless the original start + end is greater than
603  * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
604  * -EOVERFLOW in that case.  l_len could be too big, in which case we just
605  * truncate it, and only allow the app to see that part of the conflicting lock
606  * that might make sense to it anyway
607  */
608 static int fixup_compat_flock(struct flock *flock)
609 {
610 	if (flock->l_start > COMPAT_OFF_T_MAX)
611 		return -EOVERFLOW;
612 	if (flock->l_len > COMPAT_OFF_T_MAX)
613 		flock->l_len = COMPAT_OFF_T_MAX;
614 	return 0;
615 }
616 
617 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
618 			     compat_ulong_t arg)
619 {
620 	struct fd f = fdget_raw(fd);
621 	struct flock flock;
622 	long err = -EBADF;
623 
624 	if (!f.file)
625 		return err;
626 
627 	if (unlikely(f.file->f_mode & FMODE_PATH)) {
628 		if (!check_fcntl_cmd(cmd))
629 			goto out_put;
630 	}
631 
632 	err = security_file_fcntl(f.file, cmd, arg);
633 	if (err)
634 		goto out_put;
635 
636 	switch (cmd) {
637 	case F_GETLK:
638 		err = get_compat_flock(&flock, compat_ptr(arg));
639 		if (err)
640 			break;
641 		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
642 		if (err)
643 			break;
644 		err = fixup_compat_flock(&flock);
645 		if (!err)
646 			err = put_compat_flock(&flock, compat_ptr(arg));
647 		break;
648 	case F_GETLK64:
649 	case F_OFD_GETLK:
650 		err = get_compat_flock64(&flock, compat_ptr(arg));
651 		if (err)
652 			break;
653 		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
654 		if (!err)
655 			err = put_compat_flock64(&flock, compat_ptr(arg));
656 		break;
657 	case F_SETLK:
658 	case F_SETLKW:
659 		err = get_compat_flock(&flock, compat_ptr(arg));
660 		if (err)
661 			break;
662 		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
663 		break;
664 	case F_SETLK64:
665 	case F_SETLKW64:
666 	case F_OFD_SETLK:
667 	case F_OFD_SETLKW:
668 		err = get_compat_flock64(&flock, compat_ptr(arg));
669 		if (err)
670 			break;
671 		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
672 		break;
673 	default:
674 		err = do_fcntl(fd, cmd, arg, f.file);
675 		break;
676 	}
677 out_put:
678 	fdput(f);
679 	return err;
680 }
681 
682 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
683 		       compat_ulong_t, arg)
684 {
685 	return do_compat_fcntl64(fd, cmd, arg);
686 }
687 
688 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
689 		       compat_ulong_t, arg)
690 {
691 	switch (cmd) {
692 	case F_GETLK64:
693 	case F_SETLK64:
694 	case F_SETLKW64:
695 	case F_OFD_GETLK:
696 	case F_OFD_SETLK:
697 	case F_OFD_SETLKW:
698 		return -EINVAL;
699 	}
700 	return do_compat_fcntl64(fd, cmd, arg);
701 }
702 #endif
703 
704 /* Table to convert sigio signal codes into poll band bitmaps */
705 
706 static const __poll_t band_table[NSIGPOLL] = {
707 	EPOLLIN | EPOLLRDNORM,			/* POLL_IN */
708 	EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,	/* POLL_OUT */
709 	EPOLLIN | EPOLLRDNORM | EPOLLMSG,		/* POLL_MSG */
710 	EPOLLERR,				/* POLL_ERR */
711 	EPOLLPRI | EPOLLRDBAND,			/* POLL_PRI */
712 	EPOLLHUP | EPOLLERR			/* POLL_HUP */
713 };
714 
715 static inline int sigio_perm(struct task_struct *p,
716                              struct fown_struct *fown, int sig)
717 {
718 	const struct cred *cred;
719 	int ret;
720 
721 	rcu_read_lock();
722 	cred = __task_cred(p);
723 	ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
724 		uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
725 		uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
726 	       !security_file_send_sigiotask(p, fown, sig));
727 	rcu_read_unlock();
728 	return ret;
729 }
730 
731 static void send_sigio_to_task(struct task_struct *p,
732 			       struct fown_struct *fown,
733 			       int fd, int reason, enum pid_type type)
734 {
735 	/*
736 	 * F_SETSIG can change ->signum lockless in parallel, make
737 	 * sure we read it once and use the same value throughout.
738 	 */
739 	int signum = READ_ONCE(fown->signum);
740 
741 	if (!sigio_perm(p, fown, signum))
742 		return;
743 
744 	switch (signum) {
745 		default: {
746 			kernel_siginfo_t si;
747 
748 			/* Queue a rt signal with the appropriate fd as its
749 			   value.  We use SI_SIGIO as the source, not
750 			   SI_KERNEL, since kernel signals always get
751 			   delivered even if we can't queue.  Failure to
752 			   queue in this case _should_ be reported; we fall
753 			   back to SIGIO in that case. --sct */
754 			clear_siginfo(&si);
755 			si.si_signo = signum;
756 			si.si_errno = 0;
757 		        si.si_code  = reason;
758 			/*
759 			 * Posix definies POLL_IN and friends to be signal
760 			 * specific si_codes for SIG_POLL.  Linux extended
761 			 * these si_codes to other signals in a way that is
762 			 * ambiguous if other signals also have signal
763 			 * specific si_codes.  In that case use SI_SIGIO instead
764 			 * to remove the ambiguity.
765 			 */
766 			if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
767 				si.si_code = SI_SIGIO;
768 
769 			/* Make sure we are called with one of the POLL_*
770 			   reasons, otherwise we could leak kernel stack into
771 			   userspace.  */
772 			BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
773 			if (reason - POLL_IN >= NSIGPOLL)
774 				si.si_band  = ~0L;
775 			else
776 				si.si_band = mangle_poll(band_table[reason - POLL_IN]);
777 			si.si_fd    = fd;
778 			if (!do_send_sig_info(signum, &si, p, type))
779 				break;
780 		}
781 			fallthrough;	/* fall back on the old plain SIGIO signal */
782 		case 0:
783 			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
784 	}
785 }
786 
787 void send_sigio(struct fown_struct *fown, int fd, int band)
788 {
789 	struct task_struct *p;
790 	enum pid_type type;
791 	unsigned long flags;
792 	struct pid *pid;
793 
794 	read_lock_irqsave(&fown->lock, flags);
795 
796 	type = fown->pid_type;
797 	pid = fown->pid;
798 	if (!pid)
799 		goto out_unlock_fown;
800 
801 	if (type <= PIDTYPE_TGID) {
802 		rcu_read_lock();
803 		p = pid_task(pid, PIDTYPE_PID);
804 		if (p)
805 			send_sigio_to_task(p, fown, fd, band, type);
806 		rcu_read_unlock();
807 	} else {
808 		read_lock(&tasklist_lock);
809 		do_each_pid_task(pid, type, p) {
810 			send_sigio_to_task(p, fown, fd, band, type);
811 		} while_each_pid_task(pid, type, p);
812 		read_unlock(&tasklist_lock);
813 	}
814  out_unlock_fown:
815 	read_unlock_irqrestore(&fown->lock, flags);
816 }
817 
818 static void send_sigurg_to_task(struct task_struct *p,
819 				struct fown_struct *fown, enum pid_type type)
820 {
821 	if (sigio_perm(p, fown, SIGURG))
822 		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
823 }
824 
825 int send_sigurg(struct fown_struct *fown)
826 {
827 	struct task_struct *p;
828 	enum pid_type type;
829 	struct pid *pid;
830 	unsigned long flags;
831 	int ret = 0;
832 
833 	read_lock_irqsave(&fown->lock, flags);
834 
835 	type = fown->pid_type;
836 	pid = fown->pid;
837 	if (!pid)
838 		goto out_unlock_fown;
839 
840 	ret = 1;
841 
842 	if (type <= PIDTYPE_TGID) {
843 		rcu_read_lock();
844 		p = pid_task(pid, PIDTYPE_PID);
845 		if (p)
846 			send_sigurg_to_task(p, fown, type);
847 		rcu_read_unlock();
848 	} else {
849 		read_lock(&tasklist_lock);
850 		do_each_pid_task(pid, type, p) {
851 			send_sigurg_to_task(p, fown, type);
852 		} while_each_pid_task(pid, type, p);
853 		read_unlock(&tasklist_lock);
854 	}
855  out_unlock_fown:
856 	read_unlock_irqrestore(&fown->lock, flags);
857 	return ret;
858 }
859 
860 static DEFINE_SPINLOCK(fasync_lock);
861 static struct kmem_cache *fasync_cache __read_mostly;
862 
863 static void fasync_free_rcu(struct rcu_head *head)
864 {
865 	kmem_cache_free(fasync_cache,
866 			container_of(head, struct fasync_struct, fa_rcu));
867 }
868 
869 /*
870  * Remove a fasync entry. If successfully removed, return
871  * positive and clear the FASYNC flag. If no entry exists,
872  * do nothing and return 0.
873  *
874  * NOTE! It is very important that the FASYNC flag always
875  * match the state "is the filp on a fasync list".
876  *
877  */
878 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
879 {
880 	struct fasync_struct *fa, **fp;
881 	int result = 0;
882 
883 	spin_lock(&filp->f_lock);
884 	spin_lock(&fasync_lock);
885 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
886 		if (fa->fa_file != filp)
887 			continue;
888 
889 		write_lock_irq(&fa->fa_lock);
890 		fa->fa_file = NULL;
891 		write_unlock_irq(&fa->fa_lock);
892 
893 		*fp = fa->fa_next;
894 		call_rcu(&fa->fa_rcu, fasync_free_rcu);
895 		filp->f_flags &= ~FASYNC;
896 		result = 1;
897 		break;
898 	}
899 	spin_unlock(&fasync_lock);
900 	spin_unlock(&filp->f_lock);
901 	return result;
902 }
903 
904 struct fasync_struct *fasync_alloc(void)
905 {
906 	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
907 }
908 
909 /*
910  * NOTE! This can be used only for unused fasync entries:
911  * entries that actually got inserted on the fasync list
912  * need to be released by rcu - see fasync_remove_entry.
913  */
914 void fasync_free(struct fasync_struct *new)
915 {
916 	kmem_cache_free(fasync_cache, new);
917 }
918 
919 /*
920  * Insert a new entry into the fasync list.  Return the pointer to the
921  * old one if we didn't use the new one.
922  *
923  * NOTE! It is very important that the FASYNC flag always
924  * match the state "is the filp on a fasync list".
925  */
926 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
927 {
928         struct fasync_struct *fa, **fp;
929 
930 	spin_lock(&filp->f_lock);
931 	spin_lock(&fasync_lock);
932 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
933 		if (fa->fa_file != filp)
934 			continue;
935 
936 		write_lock_irq(&fa->fa_lock);
937 		fa->fa_fd = fd;
938 		write_unlock_irq(&fa->fa_lock);
939 		goto out;
940 	}
941 
942 	rwlock_init(&new->fa_lock);
943 	new->magic = FASYNC_MAGIC;
944 	new->fa_file = filp;
945 	new->fa_fd = fd;
946 	new->fa_next = *fapp;
947 	rcu_assign_pointer(*fapp, new);
948 	filp->f_flags |= FASYNC;
949 
950 out:
951 	spin_unlock(&fasync_lock);
952 	spin_unlock(&filp->f_lock);
953 	return fa;
954 }
955 
956 /*
957  * Add a fasync entry. Return negative on error, positive if
958  * added, and zero if did nothing but change an existing one.
959  */
960 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
961 {
962 	struct fasync_struct *new;
963 
964 	new = fasync_alloc();
965 	if (!new)
966 		return -ENOMEM;
967 
968 	/*
969 	 * fasync_insert_entry() returns the old (update) entry if
970 	 * it existed.
971 	 *
972 	 * So free the (unused) new entry and return 0 to let the
973 	 * caller know that we didn't add any new fasync entries.
974 	 */
975 	if (fasync_insert_entry(fd, filp, fapp, new)) {
976 		fasync_free(new);
977 		return 0;
978 	}
979 
980 	return 1;
981 }
982 
983 /*
984  * fasync_helper() is used by almost all character device drivers
985  * to set up the fasync queue, and for regular files by the file
986  * lease code. It returns negative on error, 0 if it did no changes
987  * and positive if it added/deleted the entry.
988  */
989 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
990 {
991 	if (!on)
992 		return fasync_remove_entry(filp, fapp);
993 	return fasync_add_entry(fd, filp, fapp);
994 }
995 
996 EXPORT_SYMBOL(fasync_helper);
997 
998 /*
999  * rcu_read_lock() is held
1000  */
1001 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
1002 {
1003 	while (fa) {
1004 		struct fown_struct *fown;
1005 
1006 		if (fa->magic != FASYNC_MAGIC) {
1007 			printk(KERN_ERR "kill_fasync: bad magic number in "
1008 			       "fasync_struct!\n");
1009 			return;
1010 		}
1011 		read_lock(&fa->fa_lock);
1012 		if (fa->fa_file) {
1013 			fown = &fa->fa_file->f_owner;
1014 			/* Don't send SIGURG to processes which have not set a
1015 			   queued signum: SIGURG has its own default signalling
1016 			   mechanism. */
1017 			if (!(sig == SIGURG && fown->signum == 0))
1018 				send_sigio(fown, fa->fa_fd, band);
1019 		}
1020 		read_unlock(&fa->fa_lock);
1021 		fa = rcu_dereference(fa->fa_next);
1022 	}
1023 }
1024 
1025 void kill_fasync(struct fasync_struct **fp, int sig, int band)
1026 {
1027 	/* First a quick test without locking: usually
1028 	 * the list is empty.
1029 	 */
1030 	if (*fp) {
1031 		rcu_read_lock();
1032 		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1033 		rcu_read_unlock();
1034 	}
1035 }
1036 EXPORT_SYMBOL(kill_fasync);
1037 
1038 static int __init fcntl_init(void)
1039 {
1040 	/*
1041 	 * Please add new bits here to ensure allocation uniqueness.
1042 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1043 	 * is defined as O_NONBLOCK on some platforms and not on others.
1044 	 */
1045 	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
1046 		HWEIGHT32(
1047 			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
1048 			__FMODE_EXEC | __FMODE_NONOTIFY));
1049 
1050 	fasync_cache = kmem_cache_create("fasync_cache",
1051 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
1052 	return 0;
1053 }
1054 
1055 module_init(fcntl_init)
1056