1 /* 2 * linux/fs/fcntl.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/syscalls.h> 8 #include <linux/init.h> 9 #include <linux/mm.h> 10 #include <linux/sched/task.h> 11 #include <linux/fs.h> 12 #include <linux/file.h> 13 #include <linux/fdtable.h> 14 #include <linux/capability.h> 15 #include <linux/dnotify.h> 16 #include <linux/slab.h> 17 #include <linux/module.h> 18 #include <linux/pipe_fs_i.h> 19 #include <linux/security.h> 20 #include <linux/ptrace.h> 21 #include <linux/signal.h> 22 #include <linux/rcupdate.h> 23 #include <linux/pid_namespace.h> 24 #include <linux/user_namespace.h> 25 #include <linux/shmem_fs.h> 26 #include <linux/compat.h> 27 28 #include <asm/poll.h> 29 #include <asm/siginfo.h> 30 #include <linux/uaccess.h> 31 32 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 33 34 static int setfl(int fd, struct file * filp, unsigned long arg) 35 { 36 struct inode * inode = file_inode(filp); 37 int error = 0; 38 39 /* 40 * O_APPEND cannot be cleared if the file is marked as append-only 41 * and the file is open for write. 42 */ 43 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 44 return -EPERM; 45 46 /* O_NOATIME can only be set by the owner or superuser */ 47 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 48 if (!inode_owner_or_capable(inode)) 49 return -EPERM; 50 51 /* required for strict SunOS emulation */ 52 if (O_NONBLOCK != O_NDELAY) 53 if (arg & O_NDELAY) 54 arg |= O_NONBLOCK; 55 56 /* Pipe packetized mode is controlled by O_DIRECT flag */ 57 if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) { 58 if (!filp->f_mapping || !filp->f_mapping->a_ops || 59 !filp->f_mapping->a_ops->direct_IO) 60 return -EINVAL; 61 } 62 63 if (filp->f_op->check_flags) 64 error = filp->f_op->check_flags(arg); 65 if (error) 66 return error; 67 68 /* 69 * ->fasync() is responsible for setting the FASYNC bit. 70 */ 71 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { 72 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 73 if (error < 0) 74 goto out; 75 if (error > 0) 76 error = 0; 77 } 78 spin_lock(&filp->f_lock); 79 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 80 spin_unlock(&filp->f_lock); 81 82 out: 83 return error; 84 } 85 86 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 87 int force) 88 { 89 write_lock_irq(&filp->f_owner.lock); 90 if (force || !filp->f_owner.pid) { 91 put_pid(filp->f_owner.pid); 92 filp->f_owner.pid = get_pid(pid); 93 filp->f_owner.pid_type = type; 94 95 if (pid) { 96 const struct cred *cred = current_cred(); 97 filp->f_owner.uid = cred->uid; 98 filp->f_owner.euid = cred->euid; 99 } 100 } 101 write_unlock_irq(&filp->f_owner.lock); 102 } 103 104 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 105 int force) 106 { 107 security_file_set_fowner(filp); 108 f_modown(filp, pid, type, force); 109 } 110 EXPORT_SYMBOL(__f_setown); 111 112 void f_setown(struct file *filp, unsigned long arg, int force) 113 { 114 enum pid_type type; 115 struct pid *pid; 116 int who = arg; 117 type = PIDTYPE_PID; 118 if (who < 0) { 119 type = PIDTYPE_PGID; 120 who = -who; 121 } 122 rcu_read_lock(); 123 pid = find_vpid(who); 124 __f_setown(filp, pid, type, force); 125 rcu_read_unlock(); 126 } 127 EXPORT_SYMBOL(f_setown); 128 129 void f_delown(struct file *filp) 130 { 131 f_modown(filp, NULL, PIDTYPE_PID, 1); 132 } 133 134 pid_t f_getown(struct file *filp) 135 { 136 pid_t pid; 137 read_lock(&filp->f_owner.lock); 138 pid = pid_vnr(filp->f_owner.pid); 139 if (filp->f_owner.pid_type == PIDTYPE_PGID) 140 pid = -pid; 141 read_unlock(&filp->f_owner.lock); 142 return pid; 143 } 144 145 static int f_setown_ex(struct file *filp, unsigned long arg) 146 { 147 struct f_owner_ex __user *owner_p = (void __user *)arg; 148 struct f_owner_ex owner; 149 struct pid *pid; 150 int type; 151 int ret; 152 153 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 154 if (ret) 155 return -EFAULT; 156 157 switch (owner.type) { 158 case F_OWNER_TID: 159 type = PIDTYPE_MAX; 160 break; 161 162 case F_OWNER_PID: 163 type = PIDTYPE_PID; 164 break; 165 166 case F_OWNER_PGRP: 167 type = PIDTYPE_PGID; 168 break; 169 170 default: 171 return -EINVAL; 172 } 173 174 rcu_read_lock(); 175 pid = find_vpid(owner.pid); 176 if (owner.pid && !pid) 177 ret = -ESRCH; 178 else 179 __f_setown(filp, pid, type, 1); 180 rcu_read_unlock(); 181 182 return ret; 183 } 184 185 static int f_getown_ex(struct file *filp, unsigned long arg) 186 { 187 struct f_owner_ex __user *owner_p = (void __user *)arg; 188 struct f_owner_ex owner; 189 int ret = 0; 190 191 read_lock(&filp->f_owner.lock); 192 owner.pid = pid_vnr(filp->f_owner.pid); 193 switch (filp->f_owner.pid_type) { 194 case PIDTYPE_MAX: 195 owner.type = F_OWNER_TID; 196 break; 197 198 case PIDTYPE_PID: 199 owner.type = F_OWNER_PID; 200 break; 201 202 case PIDTYPE_PGID: 203 owner.type = F_OWNER_PGRP; 204 break; 205 206 default: 207 WARN_ON(1); 208 ret = -EINVAL; 209 break; 210 } 211 read_unlock(&filp->f_owner.lock); 212 213 if (!ret) { 214 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 215 if (ret) 216 ret = -EFAULT; 217 } 218 return ret; 219 } 220 221 #ifdef CONFIG_CHECKPOINT_RESTORE 222 static int f_getowner_uids(struct file *filp, unsigned long arg) 223 { 224 struct user_namespace *user_ns = current_user_ns(); 225 uid_t __user *dst = (void __user *)arg; 226 uid_t src[2]; 227 int err; 228 229 read_lock(&filp->f_owner.lock); 230 src[0] = from_kuid(user_ns, filp->f_owner.uid); 231 src[1] = from_kuid(user_ns, filp->f_owner.euid); 232 read_unlock(&filp->f_owner.lock); 233 234 err = put_user(src[0], &dst[0]); 235 err |= put_user(src[1], &dst[1]); 236 237 return err; 238 } 239 #else 240 static int f_getowner_uids(struct file *filp, unsigned long arg) 241 { 242 return -EINVAL; 243 } 244 #endif 245 246 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 247 struct file *filp) 248 { 249 long err = -EINVAL; 250 251 switch (cmd) { 252 case F_DUPFD: 253 err = f_dupfd(arg, filp, 0); 254 break; 255 case F_DUPFD_CLOEXEC: 256 err = f_dupfd(arg, filp, O_CLOEXEC); 257 break; 258 case F_GETFD: 259 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 260 break; 261 case F_SETFD: 262 err = 0; 263 set_close_on_exec(fd, arg & FD_CLOEXEC); 264 break; 265 case F_GETFL: 266 err = filp->f_flags; 267 break; 268 case F_SETFL: 269 err = setfl(fd, filp, arg); 270 break; 271 #if BITS_PER_LONG != 32 272 /* 32-bit arches must use fcntl64() */ 273 case F_OFD_GETLK: 274 #endif 275 case F_GETLK: 276 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); 277 break; 278 #if BITS_PER_LONG != 32 279 /* 32-bit arches must use fcntl64() */ 280 case F_OFD_SETLK: 281 case F_OFD_SETLKW: 282 #endif 283 /* Fallthrough */ 284 case F_SETLK: 285 case F_SETLKW: 286 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); 287 break; 288 case F_GETOWN: 289 /* 290 * XXX If f_owner is a process group, the 291 * negative return value will get converted 292 * into an error. Oops. If we keep the 293 * current syscall conventions, the only way 294 * to fix this will be in libc. 295 */ 296 err = f_getown(filp); 297 force_successful_syscall_return(); 298 break; 299 case F_SETOWN: 300 f_setown(filp, arg, 1); 301 err = 0; 302 break; 303 case F_GETOWN_EX: 304 err = f_getown_ex(filp, arg); 305 break; 306 case F_SETOWN_EX: 307 err = f_setown_ex(filp, arg); 308 break; 309 case F_GETOWNER_UIDS: 310 err = f_getowner_uids(filp, arg); 311 break; 312 case F_GETSIG: 313 err = filp->f_owner.signum; 314 break; 315 case F_SETSIG: 316 /* arg == 0 restores default behaviour. */ 317 if (!valid_signal(arg)) { 318 break; 319 } 320 err = 0; 321 filp->f_owner.signum = arg; 322 break; 323 case F_GETLEASE: 324 err = fcntl_getlease(filp); 325 break; 326 case F_SETLEASE: 327 err = fcntl_setlease(fd, filp, arg); 328 break; 329 case F_NOTIFY: 330 err = fcntl_dirnotify(fd, filp, arg); 331 break; 332 case F_SETPIPE_SZ: 333 case F_GETPIPE_SZ: 334 err = pipe_fcntl(filp, cmd, arg); 335 break; 336 case F_ADD_SEALS: 337 case F_GET_SEALS: 338 err = shmem_fcntl(filp, cmd, arg); 339 break; 340 default: 341 break; 342 } 343 return err; 344 } 345 346 static int check_fcntl_cmd(unsigned cmd) 347 { 348 switch (cmd) { 349 case F_DUPFD: 350 case F_DUPFD_CLOEXEC: 351 case F_GETFD: 352 case F_SETFD: 353 case F_GETFL: 354 return 1; 355 } 356 return 0; 357 } 358 359 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 360 { 361 struct fd f = fdget_raw(fd); 362 long err = -EBADF; 363 364 if (!f.file) 365 goto out; 366 367 if (unlikely(f.file->f_mode & FMODE_PATH)) { 368 if (!check_fcntl_cmd(cmd)) 369 goto out1; 370 } 371 372 err = security_file_fcntl(f.file, cmd, arg); 373 if (!err) 374 err = do_fcntl(fd, cmd, arg, f.file); 375 376 out1: 377 fdput(f); 378 out: 379 return err; 380 } 381 382 #if BITS_PER_LONG == 32 383 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 384 unsigned long, arg) 385 { 386 struct fd f = fdget_raw(fd); 387 long err = -EBADF; 388 389 if (!f.file) 390 goto out; 391 392 if (unlikely(f.file->f_mode & FMODE_PATH)) { 393 if (!check_fcntl_cmd(cmd)) 394 goto out1; 395 } 396 397 err = security_file_fcntl(f.file, cmd, arg); 398 if (err) 399 goto out1; 400 401 switch (cmd) { 402 case F_GETLK64: 403 case F_OFD_GETLK: 404 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); 405 break; 406 case F_SETLK64: 407 case F_SETLKW64: 408 case F_OFD_SETLK: 409 case F_OFD_SETLKW: 410 err = fcntl_setlk64(fd, f.file, cmd, 411 (struct flock64 __user *) arg); 412 break; 413 default: 414 err = do_fcntl(fd, cmd, arg, f.file); 415 break; 416 } 417 out1: 418 fdput(f); 419 out: 420 return err; 421 } 422 #endif 423 424 #ifdef CONFIG_COMPAT 425 static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) 426 { 427 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || 428 __get_user(kfl->l_type, &ufl->l_type) || 429 __get_user(kfl->l_whence, &ufl->l_whence) || 430 __get_user(kfl->l_start, &ufl->l_start) || 431 __get_user(kfl->l_len, &ufl->l_len) || 432 __get_user(kfl->l_pid, &ufl->l_pid)) 433 return -EFAULT; 434 return 0; 435 } 436 437 static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) 438 { 439 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || 440 __put_user(kfl->l_type, &ufl->l_type) || 441 __put_user(kfl->l_whence, &ufl->l_whence) || 442 __put_user(kfl->l_start, &ufl->l_start) || 443 __put_user(kfl->l_len, &ufl->l_len) || 444 __put_user(kfl->l_pid, &ufl->l_pid)) 445 return -EFAULT; 446 return 0; 447 } 448 449 #ifndef HAVE_ARCH_GET_COMPAT_FLOCK64 450 static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) 451 { 452 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || 453 __get_user(kfl->l_type, &ufl->l_type) || 454 __get_user(kfl->l_whence, &ufl->l_whence) || 455 __get_user(kfl->l_start, &ufl->l_start) || 456 __get_user(kfl->l_len, &ufl->l_len) || 457 __get_user(kfl->l_pid, &ufl->l_pid)) 458 return -EFAULT; 459 return 0; 460 } 461 #endif 462 463 #ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64 464 static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) 465 { 466 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || 467 __put_user(kfl->l_type, &ufl->l_type) || 468 __put_user(kfl->l_whence, &ufl->l_whence) || 469 __put_user(kfl->l_start, &ufl->l_start) || 470 __put_user(kfl->l_len, &ufl->l_len) || 471 __put_user(kfl->l_pid, &ufl->l_pid)) 472 return -EFAULT; 473 return 0; 474 } 475 #endif 476 477 static unsigned int 478 convert_fcntl_cmd(unsigned int cmd) 479 { 480 switch (cmd) { 481 case F_GETLK64: 482 return F_GETLK; 483 case F_SETLK64: 484 return F_SETLK; 485 case F_SETLKW64: 486 return F_SETLKW; 487 } 488 489 return cmd; 490 } 491 492 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 493 compat_ulong_t, arg) 494 { 495 mm_segment_t old_fs; 496 struct flock f; 497 long ret; 498 unsigned int conv_cmd; 499 500 switch (cmd) { 501 case F_GETLK: 502 case F_SETLK: 503 case F_SETLKW: 504 ret = get_compat_flock(&f, compat_ptr(arg)); 505 if (ret != 0) 506 break; 507 old_fs = get_fs(); 508 set_fs(KERNEL_DS); 509 ret = sys_fcntl(fd, cmd, (unsigned long)&f); 510 set_fs(old_fs); 511 if (cmd == F_GETLK && ret == 0) { 512 /* GETLK was successful and we need to return the data... 513 * but it needs to fit in the compat structure. 514 * l_start shouldn't be too big, unless the original 515 * start + end is greater than COMPAT_OFF_T_MAX, in which 516 * case the app was asking for trouble, so we return 517 * -EOVERFLOW in that case. 518 * l_len could be too big, in which case we just truncate it, 519 * and only allow the app to see that part of the conflicting 520 * lock that might make sense to it anyway 521 */ 522 523 if (f.l_start > COMPAT_OFF_T_MAX) 524 ret = -EOVERFLOW; 525 if (f.l_len > COMPAT_OFF_T_MAX) 526 f.l_len = COMPAT_OFF_T_MAX; 527 if (ret == 0) 528 ret = put_compat_flock(&f, compat_ptr(arg)); 529 } 530 break; 531 532 case F_GETLK64: 533 case F_SETLK64: 534 case F_SETLKW64: 535 case F_OFD_GETLK: 536 case F_OFD_SETLK: 537 case F_OFD_SETLKW: 538 ret = get_compat_flock64(&f, compat_ptr(arg)); 539 if (ret != 0) 540 break; 541 old_fs = get_fs(); 542 set_fs(KERNEL_DS); 543 conv_cmd = convert_fcntl_cmd(cmd); 544 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); 545 set_fs(old_fs); 546 if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { 547 /* need to return lock information - see above for commentary */ 548 if (f.l_start > COMPAT_LOFF_T_MAX) 549 ret = -EOVERFLOW; 550 if (f.l_len > COMPAT_LOFF_T_MAX) 551 f.l_len = COMPAT_LOFF_T_MAX; 552 if (ret == 0) 553 ret = put_compat_flock64(&f, compat_ptr(arg)); 554 } 555 break; 556 557 default: 558 ret = sys_fcntl(fd, cmd, arg); 559 break; 560 } 561 return ret; 562 } 563 564 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, 565 compat_ulong_t, arg) 566 { 567 switch (cmd) { 568 case F_GETLK64: 569 case F_SETLK64: 570 case F_SETLKW64: 571 case F_OFD_GETLK: 572 case F_OFD_SETLK: 573 case F_OFD_SETLKW: 574 return -EINVAL; 575 } 576 return compat_sys_fcntl64(fd, cmd, arg); 577 } 578 #endif 579 580 /* Table to convert sigio signal codes into poll band bitmaps */ 581 582 static const long band_table[NSIGPOLL] = { 583 POLLIN | POLLRDNORM, /* POLL_IN */ 584 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ 585 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ 586 POLLERR, /* POLL_ERR */ 587 POLLPRI | POLLRDBAND, /* POLL_PRI */ 588 POLLHUP | POLLERR /* POLL_HUP */ 589 }; 590 591 static inline int sigio_perm(struct task_struct *p, 592 struct fown_struct *fown, int sig) 593 { 594 const struct cred *cred; 595 int ret; 596 597 rcu_read_lock(); 598 cred = __task_cred(p); 599 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || 600 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || 601 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && 602 !security_file_send_sigiotask(p, fown, sig)); 603 rcu_read_unlock(); 604 return ret; 605 } 606 607 static void send_sigio_to_task(struct task_struct *p, 608 struct fown_struct *fown, 609 int fd, int reason, int group) 610 { 611 /* 612 * F_SETSIG can change ->signum lockless in parallel, make 613 * sure we read it once and use the same value throughout. 614 */ 615 int signum = ACCESS_ONCE(fown->signum); 616 617 if (!sigio_perm(p, fown, signum)) 618 return; 619 620 switch (signum) { 621 siginfo_t si; 622 default: 623 /* Queue a rt signal with the appropriate fd as its 624 value. We use SI_SIGIO as the source, not 625 SI_KERNEL, since kernel signals always get 626 delivered even if we can't queue. Failure to 627 queue in this case _should_ be reported; we fall 628 back to SIGIO in that case. --sct */ 629 si.si_signo = signum; 630 si.si_errno = 0; 631 si.si_code = reason; 632 /* Make sure we are called with one of the POLL_* 633 reasons, otherwise we could leak kernel stack into 634 userspace. */ 635 BUG_ON((reason & __SI_MASK) != __SI_POLL); 636 if (reason - POLL_IN >= NSIGPOLL) 637 si.si_band = ~0L; 638 else 639 si.si_band = band_table[reason - POLL_IN]; 640 si.si_fd = fd; 641 if (!do_send_sig_info(signum, &si, p, group)) 642 break; 643 /* fall-through: fall back on the old plain SIGIO signal */ 644 case 0: 645 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); 646 } 647 } 648 649 void send_sigio(struct fown_struct *fown, int fd, int band) 650 { 651 struct task_struct *p; 652 enum pid_type type; 653 struct pid *pid; 654 int group = 1; 655 656 read_lock(&fown->lock); 657 658 type = fown->pid_type; 659 if (type == PIDTYPE_MAX) { 660 group = 0; 661 type = PIDTYPE_PID; 662 } 663 664 pid = fown->pid; 665 if (!pid) 666 goto out_unlock_fown; 667 668 read_lock(&tasklist_lock); 669 do_each_pid_task(pid, type, p) { 670 send_sigio_to_task(p, fown, fd, band, group); 671 } while_each_pid_task(pid, type, p); 672 read_unlock(&tasklist_lock); 673 out_unlock_fown: 674 read_unlock(&fown->lock); 675 } 676 677 static void send_sigurg_to_task(struct task_struct *p, 678 struct fown_struct *fown, int group) 679 { 680 if (sigio_perm(p, fown, SIGURG)) 681 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); 682 } 683 684 int send_sigurg(struct fown_struct *fown) 685 { 686 struct task_struct *p; 687 enum pid_type type; 688 struct pid *pid; 689 int group = 1; 690 int ret = 0; 691 692 read_lock(&fown->lock); 693 694 type = fown->pid_type; 695 if (type == PIDTYPE_MAX) { 696 group = 0; 697 type = PIDTYPE_PID; 698 } 699 700 pid = fown->pid; 701 if (!pid) 702 goto out_unlock_fown; 703 704 ret = 1; 705 706 read_lock(&tasklist_lock); 707 do_each_pid_task(pid, type, p) { 708 send_sigurg_to_task(p, fown, group); 709 } while_each_pid_task(pid, type, p); 710 read_unlock(&tasklist_lock); 711 out_unlock_fown: 712 read_unlock(&fown->lock); 713 return ret; 714 } 715 716 static DEFINE_SPINLOCK(fasync_lock); 717 static struct kmem_cache *fasync_cache __read_mostly; 718 719 static void fasync_free_rcu(struct rcu_head *head) 720 { 721 kmem_cache_free(fasync_cache, 722 container_of(head, struct fasync_struct, fa_rcu)); 723 } 724 725 /* 726 * Remove a fasync entry. If successfully removed, return 727 * positive and clear the FASYNC flag. If no entry exists, 728 * do nothing and return 0. 729 * 730 * NOTE! It is very important that the FASYNC flag always 731 * match the state "is the filp on a fasync list". 732 * 733 */ 734 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 735 { 736 struct fasync_struct *fa, **fp; 737 int result = 0; 738 739 spin_lock(&filp->f_lock); 740 spin_lock(&fasync_lock); 741 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 742 if (fa->fa_file != filp) 743 continue; 744 745 spin_lock_irq(&fa->fa_lock); 746 fa->fa_file = NULL; 747 spin_unlock_irq(&fa->fa_lock); 748 749 *fp = fa->fa_next; 750 call_rcu(&fa->fa_rcu, fasync_free_rcu); 751 filp->f_flags &= ~FASYNC; 752 result = 1; 753 break; 754 } 755 spin_unlock(&fasync_lock); 756 spin_unlock(&filp->f_lock); 757 return result; 758 } 759 760 struct fasync_struct *fasync_alloc(void) 761 { 762 return kmem_cache_alloc(fasync_cache, GFP_KERNEL); 763 } 764 765 /* 766 * NOTE! This can be used only for unused fasync entries: 767 * entries that actually got inserted on the fasync list 768 * need to be released by rcu - see fasync_remove_entry. 769 */ 770 void fasync_free(struct fasync_struct *new) 771 { 772 kmem_cache_free(fasync_cache, new); 773 } 774 775 /* 776 * Insert a new entry into the fasync list. Return the pointer to the 777 * old one if we didn't use the new one. 778 * 779 * NOTE! It is very important that the FASYNC flag always 780 * match the state "is the filp on a fasync list". 781 */ 782 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) 783 { 784 struct fasync_struct *fa, **fp; 785 786 spin_lock(&filp->f_lock); 787 spin_lock(&fasync_lock); 788 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 789 if (fa->fa_file != filp) 790 continue; 791 792 spin_lock_irq(&fa->fa_lock); 793 fa->fa_fd = fd; 794 spin_unlock_irq(&fa->fa_lock); 795 goto out; 796 } 797 798 spin_lock_init(&new->fa_lock); 799 new->magic = FASYNC_MAGIC; 800 new->fa_file = filp; 801 new->fa_fd = fd; 802 new->fa_next = *fapp; 803 rcu_assign_pointer(*fapp, new); 804 filp->f_flags |= FASYNC; 805 806 out: 807 spin_unlock(&fasync_lock); 808 spin_unlock(&filp->f_lock); 809 return fa; 810 } 811 812 /* 813 * Add a fasync entry. Return negative on error, positive if 814 * added, and zero if did nothing but change an existing one. 815 */ 816 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 817 { 818 struct fasync_struct *new; 819 820 new = fasync_alloc(); 821 if (!new) 822 return -ENOMEM; 823 824 /* 825 * fasync_insert_entry() returns the old (update) entry if 826 * it existed. 827 * 828 * So free the (unused) new entry and return 0 to let the 829 * caller know that we didn't add any new fasync entries. 830 */ 831 if (fasync_insert_entry(fd, filp, fapp, new)) { 832 fasync_free(new); 833 return 0; 834 } 835 836 return 1; 837 } 838 839 /* 840 * fasync_helper() is used by almost all character device drivers 841 * to set up the fasync queue, and for regular files by the file 842 * lease code. It returns negative on error, 0 if it did no changes 843 * and positive if it added/deleted the entry. 844 */ 845 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 846 { 847 if (!on) 848 return fasync_remove_entry(filp, fapp); 849 return fasync_add_entry(fd, filp, fapp); 850 } 851 852 EXPORT_SYMBOL(fasync_helper); 853 854 /* 855 * rcu_read_lock() is held 856 */ 857 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) 858 { 859 while (fa) { 860 struct fown_struct *fown; 861 unsigned long flags; 862 863 if (fa->magic != FASYNC_MAGIC) { 864 printk(KERN_ERR "kill_fasync: bad magic number in " 865 "fasync_struct!\n"); 866 return; 867 } 868 spin_lock_irqsave(&fa->fa_lock, flags); 869 if (fa->fa_file) { 870 fown = &fa->fa_file->f_owner; 871 /* Don't send SIGURG to processes which have not set a 872 queued signum: SIGURG has its own default signalling 873 mechanism. */ 874 if (!(sig == SIGURG && fown->signum == 0)) 875 send_sigio(fown, fa->fa_fd, band); 876 } 877 spin_unlock_irqrestore(&fa->fa_lock, flags); 878 fa = rcu_dereference(fa->fa_next); 879 } 880 } 881 882 void kill_fasync(struct fasync_struct **fp, int sig, int band) 883 { 884 /* First a quick test without locking: usually 885 * the list is empty. 886 */ 887 if (*fp) { 888 rcu_read_lock(); 889 kill_fasync_rcu(rcu_dereference(*fp), sig, band); 890 rcu_read_unlock(); 891 } 892 } 893 EXPORT_SYMBOL(kill_fasync); 894 895 static int __init fcntl_init(void) 896 { 897 /* 898 * Please add new bits here to ensure allocation uniqueness. 899 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 900 * is defined as O_NONBLOCK on some platforms and not on others. 901 */ 902 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != 903 HWEIGHT32( 904 (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | 905 __FMODE_EXEC | __FMODE_NONOTIFY)); 906 907 fasync_cache = kmem_cache_create("fasync_cache", 908 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); 909 return 0; 910 } 911 912 module_init(fcntl_init) 913