1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/fcntl.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/syscalls.h> 9 #include <linux/init.h> 10 #include <linux/mm.h> 11 #include <linux/sched/task.h> 12 #include <linux/fs.h> 13 #include <linux/file.h> 14 #include <linux/fdtable.h> 15 #include <linux/capability.h> 16 #include <linux/dnotify.h> 17 #include <linux/slab.h> 18 #include <linux/module.h> 19 #include <linux/pipe_fs_i.h> 20 #include <linux/security.h> 21 #include <linux/ptrace.h> 22 #include <linux/signal.h> 23 #include <linux/rcupdate.h> 24 #include <linux/pid_namespace.h> 25 #include <linux/user_namespace.h> 26 #include <linux/memfd.h> 27 #include <linux/compat.h> 28 29 #include <linux/poll.h> 30 #include <asm/siginfo.h> 31 #include <linux/uaccess.h> 32 33 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 34 35 static int setfl(int fd, struct file * filp, unsigned long arg) 36 { 37 struct inode * inode = file_inode(filp); 38 int error = 0; 39 40 /* 41 * O_APPEND cannot be cleared if the file is marked as append-only 42 * and the file is open for write. 43 */ 44 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 45 return -EPERM; 46 47 /* O_NOATIME can only be set by the owner or superuser */ 48 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 49 if (!inode_owner_or_capable(inode)) 50 return -EPERM; 51 52 /* required for strict SunOS emulation */ 53 if (O_NONBLOCK != O_NDELAY) 54 if (arg & O_NDELAY) 55 arg |= O_NONBLOCK; 56 57 /* Pipe packetized mode is controlled by O_DIRECT flag */ 58 if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) { 59 if (!filp->f_mapping || !filp->f_mapping->a_ops || 60 !filp->f_mapping->a_ops->direct_IO) 61 return -EINVAL; 62 } 63 64 if (filp->f_op->check_flags) 65 error = filp->f_op->check_flags(arg); 66 if (error) 67 return error; 68 69 /* 70 * ->fasync() is responsible for setting the FASYNC bit. 71 */ 72 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { 73 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 74 if (error < 0) 75 goto out; 76 if (error > 0) 77 error = 0; 78 } 79 spin_lock(&filp->f_lock); 80 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 81 spin_unlock(&filp->f_lock); 82 83 out: 84 return error; 85 } 86 87 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 88 int force) 89 { 90 write_lock_irq(&filp->f_owner.lock); 91 if (force || !filp->f_owner.pid) { 92 put_pid(filp->f_owner.pid); 93 filp->f_owner.pid = get_pid(pid); 94 filp->f_owner.pid_type = type; 95 96 if (pid) { 97 const struct cred *cred = current_cred(); 98 filp->f_owner.uid = cred->uid; 99 filp->f_owner.euid = cred->euid; 100 } 101 } 102 write_unlock_irq(&filp->f_owner.lock); 103 } 104 105 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 106 int force) 107 { 108 security_file_set_fowner(filp); 109 f_modown(filp, pid, type, force); 110 } 111 EXPORT_SYMBOL(__f_setown); 112 113 int f_setown(struct file *filp, unsigned long arg, int force) 114 { 115 enum pid_type type; 116 struct pid *pid = NULL; 117 int who = arg, ret = 0; 118 119 type = PIDTYPE_TGID; 120 if (who < 0) { 121 /* avoid overflow below */ 122 if (who == INT_MIN) 123 return -EINVAL; 124 125 type = PIDTYPE_PGID; 126 who = -who; 127 } 128 129 rcu_read_lock(); 130 if (who) { 131 pid = find_vpid(who); 132 if (!pid) 133 ret = -ESRCH; 134 } 135 136 if (!ret) 137 __f_setown(filp, pid, type, force); 138 rcu_read_unlock(); 139 140 return ret; 141 } 142 EXPORT_SYMBOL(f_setown); 143 144 void f_delown(struct file *filp) 145 { 146 f_modown(filp, NULL, PIDTYPE_TGID, 1); 147 } 148 149 pid_t f_getown(struct file *filp) 150 { 151 pid_t pid = 0; 152 read_lock(&filp->f_owner.lock); 153 rcu_read_lock(); 154 if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { 155 pid = pid_vnr(filp->f_owner.pid); 156 if (filp->f_owner.pid_type == PIDTYPE_PGID) 157 pid = -pid; 158 } 159 rcu_read_unlock(); 160 read_unlock(&filp->f_owner.lock); 161 return pid; 162 } 163 164 static int f_setown_ex(struct file *filp, unsigned long arg) 165 { 166 struct f_owner_ex __user *owner_p = (void __user *)arg; 167 struct f_owner_ex owner; 168 struct pid *pid; 169 int type; 170 int ret; 171 172 ret = copy_from_user(&owner, owner_p, sizeof(owner)); 173 if (ret) 174 return -EFAULT; 175 176 switch (owner.type) { 177 case F_OWNER_TID: 178 type = PIDTYPE_PID; 179 break; 180 181 case F_OWNER_PID: 182 type = PIDTYPE_TGID; 183 break; 184 185 case F_OWNER_PGRP: 186 type = PIDTYPE_PGID; 187 break; 188 189 default: 190 return -EINVAL; 191 } 192 193 rcu_read_lock(); 194 pid = find_vpid(owner.pid); 195 if (owner.pid && !pid) 196 ret = -ESRCH; 197 else 198 __f_setown(filp, pid, type, 1); 199 rcu_read_unlock(); 200 201 return ret; 202 } 203 204 static int f_getown_ex(struct file *filp, unsigned long arg) 205 { 206 struct f_owner_ex __user *owner_p = (void __user *)arg; 207 struct f_owner_ex owner = {}; 208 int ret = 0; 209 210 read_lock(&filp->f_owner.lock); 211 rcu_read_lock(); 212 if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) 213 owner.pid = pid_vnr(filp->f_owner.pid); 214 rcu_read_unlock(); 215 switch (filp->f_owner.pid_type) { 216 case PIDTYPE_PID: 217 owner.type = F_OWNER_TID; 218 break; 219 220 case PIDTYPE_TGID: 221 owner.type = F_OWNER_PID; 222 break; 223 224 case PIDTYPE_PGID: 225 owner.type = F_OWNER_PGRP; 226 break; 227 228 default: 229 WARN_ON(1); 230 ret = -EINVAL; 231 break; 232 } 233 read_unlock(&filp->f_owner.lock); 234 235 if (!ret) { 236 ret = copy_to_user(owner_p, &owner, sizeof(owner)); 237 if (ret) 238 ret = -EFAULT; 239 } 240 return ret; 241 } 242 243 #ifdef CONFIG_CHECKPOINT_RESTORE 244 static int f_getowner_uids(struct file *filp, unsigned long arg) 245 { 246 struct user_namespace *user_ns = current_user_ns(); 247 uid_t __user *dst = (void __user *)arg; 248 uid_t src[2]; 249 int err; 250 251 read_lock(&filp->f_owner.lock); 252 src[0] = from_kuid(user_ns, filp->f_owner.uid); 253 src[1] = from_kuid(user_ns, filp->f_owner.euid); 254 read_unlock(&filp->f_owner.lock); 255 256 err = put_user(src[0], &dst[0]); 257 err |= put_user(src[1], &dst[1]); 258 259 return err; 260 } 261 #else 262 static int f_getowner_uids(struct file *filp, unsigned long arg) 263 { 264 return -EINVAL; 265 } 266 #endif 267 268 static bool rw_hint_valid(enum rw_hint hint) 269 { 270 switch (hint) { 271 case RWH_WRITE_LIFE_NOT_SET: 272 case RWH_WRITE_LIFE_NONE: 273 case RWH_WRITE_LIFE_SHORT: 274 case RWH_WRITE_LIFE_MEDIUM: 275 case RWH_WRITE_LIFE_LONG: 276 case RWH_WRITE_LIFE_EXTREME: 277 return true; 278 default: 279 return false; 280 } 281 } 282 283 static long fcntl_rw_hint(struct file *file, unsigned int cmd, 284 unsigned long arg) 285 { 286 struct inode *inode = file_inode(file); 287 u64 __user *argp = (u64 __user *)arg; 288 enum rw_hint hint; 289 u64 h; 290 291 switch (cmd) { 292 case F_GET_FILE_RW_HINT: 293 h = file_write_hint(file); 294 if (copy_to_user(argp, &h, sizeof(*argp))) 295 return -EFAULT; 296 return 0; 297 case F_SET_FILE_RW_HINT: 298 if (copy_from_user(&h, argp, sizeof(h))) 299 return -EFAULT; 300 hint = (enum rw_hint) h; 301 if (!rw_hint_valid(hint)) 302 return -EINVAL; 303 304 spin_lock(&file->f_lock); 305 file->f_write_hint = hint; 306 spin_unlock(&file->f_lock); 307 return 0; 308 case F_GET_RW_HINT: 309 h = inode->i_write_hint; 310 if (copy_to_user(argp, &h, sizeof(*argp))) 311 return -EFAULT; 312 return 0; 313 case F_SET_RW_HINT: 314 if (copy_from_user(&h, argp, sizeof(h))) 315 return -EFAULT; 316 hint = (enum rw_hint) h; 317 if (!rw_hint_valid(hint)) 318 return -EINVAL; 319 320 inode_lock(inode); 321 inode->i_write_hint = hint; 322 inode_unlock(inode); 323 return 0; 324 default: 325 return -EINVAL; 326 } 327 } 328 329 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 330 struct file *filp) 331 { 332 void __user *argp = (void __user *)arg; 333 struct flock flock; 334 long err = -EINVAL; 335 336 switch (cmd) { 337 case F_DUPFD: 338 err = f_dupfd(arg, filp, 0); 339 break; 340 case F_DUPFD_CLOEXEC: 341 err = f_dupfd(arg, filp, O_CLOEXEC); 342 break; 343 case F_GETFD: 344 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 345 break; 346 case F_SETFD: 347 err = 0; 348 set_close_on_exec(fd, arg & FD_CLOEXEC); 349 break; 350 case F_GETFL: 351 err = filp->f_flags; 352 break; 353 case F_SETFL: 354 err = setfl(fd, filp, arg); 355 break; 356 #if BITS_PER_LONG != 32 357 /* 32-bit arches must use fcntl64() */ 358 case F_OFD_GETLK: 359 #endif 360 case F_GETLK: 361 if (copy_from_user(&flock, argp, sizeof(flock))) 362 return -EFAULT; 363 err = fcntl_getlk(filp, cmd, &flock); 364 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 365 return -EFAULT; 366 break; 367 #if BITS_PER_LONG != 32 368 /* 32-bit arches must use fcntl64() */ 369 case F_OFD_SETLK: 370 case F_OFD_SETLKW: 371 #endif 372 fallthrough; 373 case F_SETLK: 374 case F_SETLKW: 375 if (copy_from_user(&flock, argp, sizeof(flock))) 376 return -EFAULT; 377 err = fcntl_setlk(fd, filp, cmd, &flock); 378 break; 379 case F_GETOWN: 380 /* 381 * XXX If f_owner is a process group, the 382 * negative return value will get converted 383 * into an error. Oops. If we keep the 384 * current syscall conventions, the only way 385 * to fix this will be in libc. 386 */ 387 err = f_getown(filp); 388 force_successful_syscall_return(); 389 break; 390 case F_SETOWN: 391 err = f_setown(filp, arg, 1); 392 break; 393 case F_GETOWN_EX: 394 err = f_getown_ex(filp, arg); 395 break; 396 case F_SETOWN_EX: 397 err = f_setown_ex(filp, arg); 398 break; 399 case F_GETOWNER_UIDS: 400 err = f_getowner_uids(filp, arg); 401 break; 402 case F_GETSIG: 403 err = filp->f_owner.signum; 404 break; 405 case F_SETSIG: 406 /* arg == 0 restores default behaviour. */ 407 if (!valid_signal(arg)) { 408 break; 409 } 410 err = 0; 411 filp->f_owner.signum = arg; 412 break; 413 case F_GETLEASE: 414 err = fcntl_getlease(filp); 415 break; 416 case F_SETLEASE: 417 err = fcntl_setlease(fd, filp, arg); 418 break; 419 case F_NOTIFY: 420 err = fcntl_dirnotify(fd, filp, arg); 421 break; 422 case F_SETPIPE_SZ: 423 case F_GETPIPE_SZ: 424 err = pipe_fcntl(filp, cmd, arg); 425 break; 426 case F_ADD_SEALS: 427 case F_GET_SEALS: 428 err = memfd_fcntl(filp, cmd, arg); 429 break; 430 case F_GET_RW_HINT: 431 case F_SET_RW_HINT: 432 case F_GET_FILE_RW_HINT: 433 case F_SET_FILE_RW_HINT: 434 err = fcntl_rw_hint(filp, cmd, arg); 435 break; 436 default: 437 break; 438 } 439 return err; 440 } 441 442 static int check_fcntl_cmd(unsigned cmd) 443 { 444 switch (cmd) { 445 case F_DUPFD: 446 case F_DUPFD_CLOEXEC: 447 case F_GETFD: 448 case F_SETFD: 449 case F_GETFL: 450 return 1; 451 } 452 return 0; 453 } 454 455 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 456 { 457 struct fd f = fdget_raw(fd); 458 long err = -EBADF; 459 460 if (!f.file) 461 goto out; 462 463 if (unlikely(f.file->f_mode & FMODE_PATH)) { 464 if (!check_fcntl_cmd(cmd)) 465 goto out1; 466 } 467 468 err = security_file_fcntl(f.file, cmd, arg); 469 if (!err) 470 err = do_fcntl(fd, cmd, arg, f.file); 471 472 out1: 473 fdput(f); 474 out: 475 return err; 476 } 477 478 #if BITS_PER_LONG == 32 479 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 480 unsigned long, arg) 481 { 482 void __user *argp = (void __user *)arg; 483 struct fd f = fdget_raw(fd); 484 struct flock64 flock; 485 long err = -EBADF; 486 487 if (!f.file) 488 goto out; 489 490 if (unlikely(f.file->f_mode & FMODE_PATH)) { 491 if (!check_fcntl_cmd(cmd)) 492 goto out1; 493 } 494 495 err = security_file_fcntl(f.file, cmd, arg); 496 if (err) 497 goto out1; 498 499 switch (cmd) { 500 case F_GETLK64: 501 case F_OFD_GETLK: 502 err = -EFAULT; 503 if (copy_from_user(&flock, argp, sizeof(flock))) 504 break; 505 err = fcntl_getlk64(f.file, cmd, &flock); 506 if (!err && copy_to_user(argp, &flock, sizeof(flock))) 507 err = -EFAULT; 508 break; 509 case F_SETLK64: 510 case F_SETLKW64: 511 case F_OFD_SETLK: 512 case F_OFD_SETLKW: 513 err = -EFAULT; 514 if (copy_from_user(&flock, argp, sizeof(flock))) 515 break; 516 err = fcntl_setlk64(fd, f.file, cmd, &flock); 517 break; 518 default: 519 err = do_fcntl(fd, cmd, arg, f.file); 520 break; 521 } 522 out1: 523 fdput(f); 524 out: 525 return err; 526 } 527 #endif 528 529 #ifdef CONFIG_COMPAT 530 /* careful - don't use anywhere else */ 531 #define copy_flock_fields(dst, src) \ 532 (dst)->l_type = (src)->l_type; \ 533 (dst)->l_whence = (src)->l_whence; \ 534 (dst)->l_start = (src)->l_start; \ 535 (dst)->l_len = (src)->l_len; \ 536 (dst)->l_pid = (src)->l_pid; 537 538 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl) 539 { 540 struct compat_flock fl; 541 542 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock))) 543 return -EFAULT; 544 copy_flock_fields(kfl, &fl); 545 return 0; 546 } 547 548 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl) 549 { 550 struct compat_flock64 fl; 551 552 if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64))) 553 return -EFAULT; 554 copy_flock_fields(kfl, &fl); 555 return 0; 556 } 557 558 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl) 559 { 560 struct compat_flock fl; 561 562 memset(&fl, 0, sizeof(struct compat_flock)); 563 copy_flock_fields(&fl, kfl); 564 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock))) 565 return -EFAULT; 566 return 0; 567 } 568 569 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl) 570 { 571 struct compat_flock64 fl; 572 573 BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start)); 574 BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len)); 575 576 memset(&fl, 0, sizeof(struct compat_flock64)); 577 copy_flock_fields(&fl, kfl); 578 if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64))) 579 return -EFAULT; 580 return 0; 581 } 582 #undef copy_flock_fields 583 584 static unsigned int 585 convert_fcntl_cmd(unsigned int cmd) 586 { 587 switch (cmd) { 588 case F_GETLK64: 589 return F_GETLK; 590 case F_SETLK64: 591 return F_SETLK; 592 case F_SETLKW64: 593 return F_SETLKW; 594 } 595 596 return cmd; 597 } 598 599 /* 600 * GETLK was successful and we need to return the data, but it needs to fit in 601 * the compat structure. 602 * l_start shouldn't be too big, unless the original start + end is greater than 603 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return 604 * -EOVERFLOW in that case. l_len could be too big, in which case we just 605 * truncate it, and only allow the app to see that part of the conflicting lock 606 * that might make sense to it anyway 607 */ 608 static int fixup_compat_flock(struct flock *flock) 609 { 610 if (flock->l_start > COMPAT_OFF_T_MAX) 611 return -EOVERFLOW; 612 if (flock->l_len > COMPAT_OFF_T_MAX) 613 flock->l_len = COMPAT_OFF_T_MAX; 614 return 0; 615 } 616 617 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd, 618 compat_ulong_t arg) 619 { 620 struct fd f = fdget_raw(fd); 621 struct flock flock; 622 long err = -EBADF; 623 624 if (!f.file) 625 return err; 626 627 if (unlikely(f.file->f_mode & FMODE_PATH)) { 628 if (!check_fcntl_cmd(cmd)) 629 goto out_put; 630 } 631 632 err = security_file_fcntl(f.file, cmd, arg); 633 if (err) 634 goto out_put; 635 636 switch (cmd) { 637 case F_GETLK: 638 err = get_compat_flock(&flock, compat_ptr(arg)); 639 if (err) 640 break; 641 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); 642 if (err) 643 break; 644 err = fixup_compat_flock(&flock); 645 if (!err) 646 err = put_compat_flock(&flock, compat_ptr(arg)); 647 break; 648 case F_GETLK64: 649 case F_OFD_GETLK: 650 err = get_compat_flock64(&flock, compat_ptr(arg)); 651 if (err) 652 break; 653 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock); 654 if (!err) 655 err = put_compat_flock64(&flock, compat_ptr(arg)); 656 break; 657 case F_SETLK: 658 case F_SETLKW: 659 err = get_compat_flock(&flock, compat_ptr(arg)); 660 if (err) 661 break; 662 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); 663 break; 664 case F_SETLK64: 665 case F_SETLKW64: 666 case F_OFD_SETLK: 667 case F_OFD_SETLKW: 668 err = get_compat_flock64(&flock, compat_ptr(arg)); 669 if (err) 670 break; 671 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock); 672 break; 673 default: 674 err = do_fcntl(fd, cmd, arg, f.file); 675 break; 676 } 677 out_put: 678 fdput(f); 679 return err; 680 } 681 682 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 683 compat_ulong_t, arg) 684 { 685 return do_compat_fcntl64(fd, cmd, arg); 686 } 687 688 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, 689 compat_ulong_t, arg) 690 { 691 switch (cmd) { 692 case F_GETLK64: 693 case F_SETLK64: 694 case F_SETLKW64: 695 case F_OFD_GETLK: 696 case F_OFD_SETLK: 697 case F_OFD_SETLKW: 698 return -EINVAL; 699 } 700 return do_compat_fcntl64(fd, cmd, arg); 701 } 702 #endif 703 704 /* Table to convert sigio signal codes into poll band bitmaps */ 705 706 static const __poll_t band_table[NSIGPOLL] = { 707 EPOLLIN | EPOLLRDNORM, /* POLL_IN */ 708 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND, /* POLL_OUT */ 709 EPOLLIN | EPOLLRDNORM | EPOLLMSG, /* POLL_MSG */ 710 EPOLLERR, /* POLL_ERR */ 711 EPOLLPRI | EPOLLRDBAND, /* POLL_PRI */ 712 EPOLLHUP | EPOLLERR /* POLL_HUP */ 713 }; 714 715 static inline int sigio_perm(struct task_struct *p, 716 struct fown_struct *fown, int sig) 717 { 718 const struct cred *cred; 719 int ret; 720 721 rcu_read_lock(); 722 cred = __task_cred(p); 723 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) || 724 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) || 725 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) && 726 !security_file_send_sigiotask(p, fown, sig)); 727 rcu_read_unlock(); 728 return ret; 729 } 730 731 static void send_sigio_to_task(struct task_struct *p, 732 struct fown_struct *fown, 733 int fd, int reason, enum pid_type type) 734 { 735 /* 736 * F_SETSIG can change ->signum lockless in parallel, make 737 * sure we read it once and use the same value throughout. 738 */ 739 int signum = READ_ONCE(fown->signum); 740 741 if (!sigio_perm(p, fown, signum)) 742 return; 743 744 switch (signum) { 745 default: { 746 kernel_siginfo_t si; 747 748 /* Queue a rt signal with the appropriate fd as its 749 value. We use SI_SIGIO as the source, not 750 SI_KERNEL, since kernel signals always get 751 delivered even if we can't queue. Failure to 752 queue in this case _should_ be reported; we fall 753 back to SIGIO in that case. --sct */ 754 clear_siginfo(&si); 755 si.si_signo = signum; 756 si.si_errno = 0; 757 si.si_code = reason; 758 /* 759 * Posix definies POLL_IN and friends to be signal 760 * specific si_codes for SIG_POLL. Linux extended 761 * these si_codes to other signals in a way that is 762 * ambiguous if other signals also have signal 763 * specific si_codes. In that case use SI_SIGIO instead 764 * to remove the ambiguity. 765 */ 766 if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) 767 si.si_code = SI_SIGIO; 768 769 /* Make sure we are called with one of the POLL_* 770 reasons, otherwise we could leak kernel stack into 771 userspace. */ 772 BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); 773 if (reason - POLL_IN >= NSIGPOLL) 774 si.si_band = ~0L; 775 else 776 si.si_band = mangle_poll(band_table[reason - POLL_IN]); 777 si.si_fd = fd; 778 if (!do_send_sig_info(signum, &si, p, type)) 779 break; 780 } 781 fallthrough; /* fall back on the old plain SIGIO signal */ 782 case 0: 783 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type); 784 } 785 } 786 787 void send_sigio(struct fown_struct *fown, int fd, int band) 788 { 789 struct task_struct *p; 790 enum pid_type type; 791 unsigned long flags; 792 struct pid *pid; 793 794 read_lock_irqsave(&fown->lock, flags); 795 796 type = fown->pid_type; 797 pid = fown->pid; 798 if (!pid) 799 goto out_unlock_fown; 800 801 if (type <= PIDTYPE_TGID) { 802 rcu_read_lock(); 803 p = pid_task(pid, PIDTYPE_PID); 804 if (p) 805 send_sigio_to_task(p, fown, fd, band, type); 806 rcu_read_unlock(); 807 } else { 808 read_lock(&tasklist_lock); 809 do_each_pid_task(pid, type, p) { 810 send_sigio_to_task(p, fown, fd, band, type); 811 } while_each_pid_task(pid, type, p); 812 read_unlock(&tasklist_lock); 813 } 814 out_unlock_fown: 815 read_unlock_irqrestore(&fown->lock, flags); 816 } 817 818 static void send_sigurg_to_task(struct task_struct *p, 819 struct fown_struct *fown, enum pid_type type) 820 { 821 if (sigio_perm(p, fown, SIGURG)) 822 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); 823 } 824 825 int send_sigurg(struct fown_struct *fown) 826 { 827 struct task_struct *p; 828 enum pid_type type; 829 struct pid *pid; 830 unsigned long flags; 831 int ret = 0; 832 833 read_lock_irqsave(&fown->lock, flags); 834 835 type = fown->pid_type; 836 pid = fown->pid; 837 if (!pid) 838 goto out_unlock_fown; 839 840 ret = 1; 841 842 if (type <= PIDTYPE_TGID) { 843 rcu_read_lock(); 844 p = pid_task(pid, PIDTYPE_PID); 845 if (p) 846 send_sigurg_to_task(p, fown, type); 847 rcu_read_unlock(); 848 } else { 849 read_lock(&tasklist_lock); 850 do_each_pid_task(pid, type, p) { 851 send_sigurg_to_task(p, fown, type); 852 } while_each_pid_task(pid, type, p); 853 read_unlock(&tasklist_lock); 854 } 855 out_unlock_fown: 856 read_unlock_irqrestore(&fown->lock, flags); 857 return ret; 858 } 859 860 static DEFINE_SPINLOCK(fasync_lock); 861 static struct kmem_cache *fasync_cache __read_mostly; 862 863 static void fasync_free_rcu(struct rcu_head *head) 864 { 865 kmem_cache_free(fasync_cache, 866 container_of(head, struct fasync_struct, fa_rcu)); 867 } 868 869 /* 870 * Remove a fasync entry. If successfully removed, return 871 * positive and clear the FASYNC flag. If no entry exists, 872 * do nothing and return 0. 873 * 874 * NOTE! It is very important that the FASYNC flag always 875 * match the state "is the filp on a fasync list". 876 * 877 */ 878 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 879 { 880 struct fasync_struct *fa, **fp; 881 int result = 0; 882 883 spin_lock(&filp->f_lock); 884 spin_lock(&fasync_lock); 885 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 886 if (fa->fa_file != filp) 887 continue; 888 889 write_lock_irq(&fa->fa_lock); 890 fa->fa_file = NULL; 891 write_unlock_irq(&fa->fa_lock); 892 893 *fp = fa->fa_next; 894 call_rcu(&fa->fa_rcu, fasync_free_rcu); 895 filp->f_flags &= ~FASYNC; 896 result = 1; 897 break; 898 } 899 spin_unlock(&fasync_lock); 900 spin_unlock(&filp->f_lock); 901 return result; 902 } 903 904 struct fasync_struct *fasync_alloc(void) 905 { 906 return kmem_cache_alloc(fasync_cache, GFP_KERNEL); 907 } 908 909 /* 910 * NOTE! This can be used only for unused fasync entries: 911 * entries that actually got inserted on the fasync list 912 * need to be released by rcu - see fasync_remove_entry. 913 */ 914 void fasync_free(struct fasync_struct *new) 915 { 916 kmem_cache_free(fasync_cache, new); 917 } 918 919 /* 920 * Insert a new entry into the fasync list. Return the pointer to the 921 * old one if we didn't use the new one. 922 * 923 * NOTE! It is very important that the FASYNC flag always 924 * match the state "is the filp on a fasync list". 925 */ 926 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) 927 { 928 struct fasync_struct *fa, **fp; 929 930 spin_lock(&filp->f_lock); 931 spin_lock(&fasync_lock); 932 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 933 if (fa->fa_file != filp) 934 continue; 935 936 write_lock_irq(&fa->fa_lock); 937 fa->fa_fd = fd; 938 write_unlock_irq(&fa->fa_lock); 939 goto out; 940 } 941 942 rwlock_init(&new->fa_lock); 943 new->magic = FASYNC_MAGIC; 944 new->fa_file = filp; 945 new->fa_fd = fd; 946 new->fa_next = *fapp; 947 rcu_assign_pointer(*fapp, new); 948 filp->f_flags |= FASYNC; 949 950 out: 951 spin_unlock(&fasync_lock); 952 spin_unlock(&filp->f_lock); 953 return fa; 954 } 955 956 /* 957 * Add a fasync entry. Return negative on error, positive if 958 * added, and zero if did nothing but change an existing one. 959 */ 960 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) 961 { 962 struct fasync_struct *new; 963 964 new = fasync_alloc(); 965 if (!new) 966 return -ENOMEM; 967 968 /* 969 * fasync_insert_entry() returns the old (update) entry if 970 * it existed. 971 * 972 * So free the (unused) new entry and return 0 to let the 973 * caller know that we didn't add any new fasync entries. 974 */ 975 if (fasync_insert_entry(fd, filp, fapp, new)) { 976 fasync_free(new); 977 return 0; 978 } 979 980 return 1; 981 } 982 983 /* 984 * fasync_helper() is used by almost all character device drivers 985 * to set up the fasync queue, and for regular files by the file 986 * lease code. It returns negative on error, 0 if it did no changes 987 * and positive if it added/deleted the entry. 988 */ 989 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 990 { 991 if (!on) 992 return fasync_remove_entry(filp, fapp); 993 return fasync_add_entry(fd, filp, fapp); 994 } 995 996 EXPORT_SYMBOL(fasync_helper); 997 998 /* 999 * rcu_read_lock() is held 1000 */ 1001 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) 1002 { 1003 while (fa) { 1004 struct fown_struct *fown; 1005 1006 if (fa->magic != FASYNC_MAGIC) { 1007 printk(KERN_ERR "kill_fasync: bad magic number in " 1008 "fasync_struct!\n"); 1009 return; 1010 } 1011 read_lock(&fa->fa_lock); 1012 if (fa->fa_file) { 1013 fown = &fa->fa_file->f_owner; 1014 /* Don't send SIGURG to processes which have not set a 1015 queued signum: SIGURG has its own default signalling 1016 mechanism. */ 1017 if (!(sig == SIGURG && fown->signum == 0)) 1018 send_sigio(fown, fa->fa_fd, band); 1019 } 1020 read_unlock(&fa->fa_lock); 1021 fa = rcu_dereference(fa->fa_next); 1022 } 1023 } 1024 1025 void kill_fasync(struct fasync_struct **fp, int sig, int band) 1026 { 1027 /* First a quick test without locking: usually 1028 * the list is empty. 1029 */ 1030 if (*fp) { 1031 rcu_read_lock(); 1032 kill_fasync_rcu(rcu_dereference(*fp), sig, band); 1033 rcu_read_unlock(); 1034 } 1035 } 1036 EXPORT_SYMBOL(kill_fasync); 1037 1038 static int __init fcntl_init(void) 1039 { 1040 /* 1041 * Please add new bits here to ensure allocation uniqueness. 1042 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 1043 * is defined as O_NONBLOCK on some platforms and not on others. 1044 */ 1045 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != 1046 HWEIGHT32( 1047 (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | 1048 __FMODE_EXEC | __FMODE_NONOTIFY)); 1049 1050 fasync_cache = kmem_cache_create("fasync_cache", 1051 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); 1052 return 0; 1053 } 1054 1055 module_init(fcntl_init) 1056