1 /* 2 * linux/fs/fcntl.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/syscalls.h> 8 #include <linux/init.h> 9 #include <linux/mm.h> 10 #include <linux/fs.h> 11 #include <linux/file.h> 12 #include <linux/fdtable.h> 13 #include <linux/capability.h> 14 #include <linux/dnotify.h> 15 #include <linux/slab.h> 16 #include <linux/module.h> 17 #include <linux/security.h> 18 #include <linux/ptrace.h> 19 #include <linux/signal.h> 20 #include <linux/rcupdate.h> 21 #include <linux/pid_namespace.h> 22 23 #include <asm/poll.h> 24 #include <asm/siginfo.h> 25 #include <asm/uaccess.h> 26 27 void set_close_on_exec(unsigned int fd, int flag) 28 { 29 struct files_struct *files = current->files; 30 struct fdtable *fdt; 31 spin_lock(&files->file_lock); 32 fdt = files_fdtable(files); 33 if (flag) 34 FD_SET(fd, fdt->close_on_exec); 35 else 36 FD_CLR(fd, fdt->close_on_exec); 37 spin_unlock(&files->file_lock); 38 } 39 40 static int get_close_on_exec(unsigned int fd) 41 { 42 struct files_struct *files = current->files; 43 struct fdtable *fdt; 44 int res; 45 rcu_read_lock(); 46 fdt = files_fdtable(files); 47 res = FD_ISSET(fd, fdt->close_on_exec); 48 rcu_read_unlock(); 49 return res; 50 } 51 52 /* 53 * locate_fd finds a free file descriptor in the open_fds fdset, 54 * expanding the fd arrays if necessary. Must be called with the 55 * file_lock held for write. 56 */ 57 58 static int locate_fd(unsigned int orig_start, int cloexec) 59 { 60 struct files_struct *files = current->files; 61 unsigned int newfd; 62 unsigned int start; 63 int error; 64 struct fdtable *fdt; 65 66 spin_lock(&files->file_lock); 67 repeat: 68 fdt = files_fdtable(files); 69 /* 70 * Someone might have closed fd's in the range 71 * orig_start..fdt->next_fd 72 */ 73 start = orig_start; 74 if (start < files->next_fd) 75 start = files->next_fd; 76 77 newfd = start; 78 if (start < fdt->max_fds) 79 newfd = find_next_zero_bit(fdt->open_fds->fds_bits, 80 fdt->max_fds, start); 81 82 error = expand_files(files, newfd); 83 if (error < 0) 84 goto out; 85 86 /* 87 * If we needed to expand the fs array we 88 * might have blocked - try again. 89 */ 90 if (error) 91 goto repeat; 92 93 if (start <= files->next_fd) 94 files->next_fd = newfd + 1; 95 96 FD_SET(newfd, fdt->open_fds); 97 if (cloexec) 98 FD_SET(newfd, fdt->close_on_exec); 99 else 100 FD_CLR(newfd, fdt->close_on_exec); 101 error = newfd; 102 103 out: 104 spin_unlock(&files->file_lock); 105 return error; 106 } 107 108 static int dupfd(struct file *file, unsigned int start, int cloexec) 109 { 110 int fd = locate_fd(start, cloexec); 111 if (fd >= 0) 112 fd_install(fd, file); 113 else 114 fput(file); 115 116 return fd; 117 } 118 119 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) 120 { 121 int err = -EBADF; 122 struct file * file, *tofree; 123 struct files_struct * files = current->files; 124 struct fdtable *fdt; 125 126 if ((flags & ~O_CLOEXEC) != 0) 127 return -EINVAL; 128 129 if (unlikely(oldfd == newfd)) 130 return -EINVAL; 131 132 spin_lock(&files->file_lock); 133 if (!(file = fcheck(oldfd))) 134 goto out_unlock; 135 get_file(file); /* We are now finished with oldfd */ 136 137 err = expand_files(files, newfd); 138 if (unlikely(err < 0)) { 139 if (err == -EMFILE) 140 err = -EBADF; 141 goto out_fput; 142 } 143 144 /* To avoid races with open() and dup(), we will mark the fd as 145 * in-use in the open-file bitmap throughout the entire dup2() 146 * process. This is quite safe: do_close() uses the fd array 147 * entry, not the bitmap, to decide what work needs to be 148 * done. --sct */ 149 /* Doesn't work. open() might be there first. --AV */ 150 151 /* Yes. It's a race. In user space. Nothing sane to do */ 152 err = -EBUSY; 153 fdt = files_fdtable(files); 154 tofree = fdt->fd[newfd]; 155 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 156 goto out_fput; 157 158 rcu_assign_pointer(fdt->fd[newfd], file); 159 FD_SET(newfd, fdt->open_fds); 160 if (flags & O_CLOEXEC) 161 FD_SET(newfd, fdt->close_on_exec); 162 else 163 FD_CLR(newfd, fdt->close_on_exec); 164 spin_unlock(&files->file_lock); 165 166 if (tofree) 167 filp_close(tofree, files); 168 err = newfd; 169 out: 170 return err; 171 out_unlock: 172 spin_unlock(&files->file_lock); 173 goto out; 174 175 out_fput: 176 spin_unlock(&files->file_lock); 177 fput(file); 178 goto out; 179 } 180 181 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 182 { 183 if (unlikely(newfd == oldfd)) { /* corner case */ 184 struct files_struct *files = current->files; 185 rcu_read_lock(); 186 if (!fcheck_files(files, oldfd)) 187 oldfd = -EBADF; 188 rcu_read_unlock(); 189 return oldfd; 190 } 191 return sys_dup3(oldfd, newfd, 0); 192 } 193 194 asmlinkage long sys_dup(unsigned int fildes) 195 { 196 int ret = -EBADF; 197 struct file * file = fget(fildes); 198 199 if (file) 200 ret = dupfd(file, 0, 0); 201 return ret; 202 } 203 204 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) 205 206 static int setfl(int fd, struct file * filp, unsigned long arg) 207 { 208 struct inode * inode = filp->f_path.dentry->d_inode; 209 int error = 0; 210 211 /* 212 * O_APPEND cannot be cleared if the file is marked as append-only 213 * and the file is open for write. 214 */ 215 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) 216 return -EPERM; 217 218 /* O_NOATIME can only be set by the owner or superuser */ 219 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 220 if (!is_owner_or_cap(inode)) 221 return -EPERM; 222 223 /* required for strict SunOS emulation */ 224 if (O_NONBLOCK != O_NDELAY) 225 if (arg & O_NDELAY) 226 arg |= O_NONBLOCK; 227 228 if (arg & O_DIRECT) { 229 if (!filp->f_mapping || !filp->f_mapping->a_ops || 230 !filp->f_mapping->a_ops->direct_IO) 231 return -EINVAL; 232 } 233 234 if (filp->f_op && filp->f_op->check_flags) 235 error = filp->f_op->check_flags(arg); 236 if (error) 237 return error; 238 239 if ((arg ^ filp->f_flags) & FASYNC) { 240 if (filp->f_op && filp->f_op->fasync) { 241 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 242 if (error < 0) 243 goto out; 244 } 245 } 246 247 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 248 out: 249 return error; 250 } 251 252 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, 253 uid_t uid, uid_t euid, int force) 254 { 255 write_lock_irq(&filp->f_owner.lock); 256 if (force || !filp->f_owner.pid) { 257 put_pid(filp->f_owner.pid); 258 filp->f_owner.pid = get_pid(pid); 259 filp->f_owner.pid_type = type; 260 filp->f_owner.uid = uid; 261 filp->f_owner.euid = euid; 262 } 263 write_unlock_irq(&filp->f_owner.lock); 264 } 265 266 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, 267 int force) 268 { 269 int err; 270 271 err = security_file_set_fowner(filp); 272 if (err) 273 return err; 274 275 f_modown(filp, pid, type, current->uid, current->euid, force); 276 return 0; 277 } 278 EXPORT_SYMBOL(__f_setown); 279 280 int f_setown(struct file *filp, unsigned long arg, int force) 281 { 282 enum pid_type type; 283 struct pid *pid; 284 int who = arg; 285 int result; 286 type = PIDTYPE_PID; 287 if (who < 0) { 288 type = PIDTYPE_PGID; 289 who = -who; 290 } 291 rcu_read_lock(); 292 pid = find_vpid(who); 293 result = __f_setown(filp, pid, type, force); 294 rcu_read_unlock(); 295 return result; 296 } 297 EXPORT_SYMBOL(f_setown); 298 299 void f_delown(struct file *filp) 300 { 301 f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1); 302 } 303 304 pid_t f_getown(struct file *filp) 305 { 306 pid_t pid; 307 read_lock(&filp->f_owner.lock); 308 pid = pid_vnr(filp->f_owner.pid); 309 if (filp->f_owner.pid_type == PIDTYPE_PGID) 310 pid = -pid; 311 read_unlock(&filp->f_owner.lock); 312 return pid; 313 } 314 315 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 316 struct file *filp) 317 { 318 long err = -EINVAL; 319 320 switch (cmd) { 321 case F_DUPFD: 322 case F_DUPFD_CLOEXEC: 323 if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 324 break; 325 get_file(filp); 326 err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); 327 break; 328 case F_GETFD: 329 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 330 break; 331 case F_SETFD: 332 err = 0; 333 set_close_on_exec(fd, arg & FD_CLOEXEC); 334 break; 335 case F_GETFL: 336 err = filp->f_flags; 337 break; 338 case F_SETFL: 339 err = setfl(fd, filp, arg); 340 break; 341 case F_GETLK: 342 err = fcntl_getlk(filp, (struct flock __user *) arg); 343 break; 344 case F_SETLK: 345 case F_SETLKW: 346 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); 347 break; 348 case F_GETOWN: 349 /* 350 * XXX If f_owner is a process group, the 351 * negative return value will get converted 352 * into an error. Oops. If we keep the 353 * current syscall conventions, the only way 354 * to fix this will be in libc. 355 */ 356 err = f_getown(filp); 357 force_successful_syscall_return(); 358 break; 359 case F_SETOWN: 360 err = f_setown(filp, arg, 1); 361 break; 362 case F_GETSIG: 363 err = filp->f_owner.signum; 364 break; 365 case F_SETSIG: 366 /* arg == 0 restores default behaviour. */ 367 if (!valid_signal(arg)) { 368 break; 369 } 370 err = 0; 371 filp->f_owner.signum = arg; 372 break; 373 case F_GETLEASE: 374 err = fcntl_getlease(filp); 375 break; 376 case F_SETLEASE: 377 err = fcntl_setlease(fd, filp, arg); 378 break; 379 case F_NOTIFY: 380 err = fcntl_dirnotify(fd, filp, arg); 381 break; 382 default: 383 break; 384 } 385 return err; 386 } 387 388 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) 389 { 390 struct file *filp; 391 long err = -EBADF; 392 393 filp = fget(fd); 394 if (!filp) 395 goto out; 396 397 err = security_file_fcntl(filp, cmd, arg); 398 if (err) { 399 fput(filp); 400 return err; 401 } 402 403 err = do_fcntl(fd, cmd, arg, filp); 404 405 fput(filp); 406 out: 407 return err; 408 } 409 410 #if BITS_PER_LONG == 32 411 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) 412 { 413 struct file * filp; 414 long err; 415 416 err = -EBADF; 417 filp = fget(fd); 418 if (!filp) 419 goto out; 420 421 err = security_file_fcntl(filp, cmd, arg); 422 if (err) { 423 fput(filp); 424 return err; 425 } 426 err = -EBADF; 427 428 switch (cmd) { 429 case F_GETLK64: 430 err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 431 break; 432 case F_SETLK64: 433 case F_SETLKW64: 434 err = fcntl_setlk64(fd, filp, cmd, 435 (struct flock64 __user *) arg); 436 break; 437 default: 438 err = do_fcntl(fd, cmd, arg, filp); 439 break; 440 } 441 fput(filp); 442 out: 443 return err; 444 } 445 #endif 446 447 /* Table to convert sigio signal codes into poll band bitmaps */ 448 449 static const long band_table[NSIGPOLL] = { 450 POLLIN | POLLRDNORM, /* POLL_IN */ 451 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ 452 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ 453 POLLERR, /* POLL_ERR */ 454 POLLPRI | POLLRDBAND, /* POLL_PRI */ 455 POLLHUP | POLLERR /* POLL_HUP */ 456 }; 457 458 static inline int sigio_perm(struct task_struct *p, 459 struct fown_struct *fown, int sig) 460 { 461 return (((fown->euid == 0) || 462 (fown->euid == p->suid) || (fown->euid == p->uid) || 463 (fown->uid == p->suid) || (fown->uid == p->uid)) && 464 !security_file_send_sigiotask(p, fown, sig)); 465 } 466 467 static void send_sigio_to_task(struct task_struct *p, 468 struct fown_struct *fown, 469 int fd, 470 int reason) 471 { 472 if (!sigio_perm(p, fown, fown->signum)) 473 return; 474 475 switch (fown->signum) { 476 siginfo_t si; 477 default: 478 /* Queue a rt signal with the appropriate fd as its 479 value. We use SI_SIGIO as the source, not 480 SI_KERNEL, since kernel signals always get 481 delivered even if we can't queue. Failure to 482 queue in this case _should_ be reported; we fall 483 back to SIGIO in that case. --sct */ 484 si.si_signo = fown->signum; 485 si.si_errno = 0; 486 si.si_code = reason; 487 /* Make sure we are called with one of the POLL_* 488 reasons, otherwise we could leak kernel stack into 489 userspace. */ 490 BUG_ON((reason & __SI_MASK) != __SI_POLL); 491 if (reason - POLL_IN >= NSIGPOLL) 492 si.si_band = ~0L; 493 else 494 si.si_band = band_table[reason - POLL_IN]; 495 si.si_fd = fd; 496 if (!group_send_sig_info(fown->signum, &si, p)) 497 break; 498 /* fall-through: fall back on the old plain SIGIO signal */ 499 case 0: 500 group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); 501 } 502 } 503 504 void send_sigio(struct fown_struct *fown, int fd, int band) 505 { 506 struct task_struct *p; 507 enum pid_type type; 508 struct pid *pid; 509 510 read_lock(&fown->lock); 511 type = fown->pid_type; 512 pid = fown->pid; 513 if (!pid) 514 goto out_unlock_fown; 515 516 read_lock(&tasklist_lock); 517 do_each_pid_task(pid, type, p) { 518 send_sigio_to_task(p, fown, fd, band); 519 } while_each_pid_task(pid, type, p); 520 read_unlock(&tasklist_lock); 521 out_unlock_fown: 522 read_unlock(&fown->lock); 523 } 524 525 static void send_sigurg_to_task(struct task_struct *p, 526 struct fown_struct *fown) 527 { 528 if (sigio_perm(p, fown, SIGURG)) 529 group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); 530 } 531 532 int send_sigurg(struct fown_struct *fown) 533 { 534 struct task_struct *p; 535 enum pid_type type; 536 struct pid *pid; 537 int ret = 0; 538 539 read_lock(&fown->lock); 540 type = fown->pid_type; 541 pid = fown->pid; 542 if (!pid) 543 goto out_unlock_fown; 544 545 ret = 1; 546 547 read_lock(&tasklist_lock); 548 do_each_pid_task(pid, type, p) { 549 send_sigurg_to_task(p, fown); 550 } while_each_pid_task(pid, type, p); 551 read_unlock(&tasklist_lock); 552 out_unlock_fown: 553 read_unlock(&fown->lock); 554 return ret; 555 } 556 557 static DEFINE_RWLOCK(fasync_lock); 558 static struct kmem_cache *fasync_cache __read_mostly; 559 560 /* 561 * fasync_helper() is used by some character device drivers (mainly mice) 562 * to set up the fasync queue. It returns negative on error, 0 if it did 563 * no changes and positive if it added/deleted the entry. 564 */ 565 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 566 { 567 struct fasync_struct *fa, **fp; 568 struct fasync_struct *new = NULL; 569 int result = 0; 570 571 if (on) { 572 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 573 if (!new) 574 return -ENOMEM; 575 } 576 write_lock_irq(&fasync_lock); 577 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 578 if (fa->fa_file == filp) { 579 if(on) { 580 fa->fa_fd = fd; 581 kmem_cache_free(fasync_cache, new); 582 } else { 583 *fp = fa->fa_next; 584 kmem_cache_free(fasync_cache, fa); 585 result = 1; 586 } 587 goto out; 588 } 589 } 590 591 if (on) { 592 new->magic = FASYNC_MAGIC; 593 new->fa_file = filp; 594 new->fa_fd = fd; 595 new->fa_next = *fapp; 596 *fapp = new; 597 result = 1; 598 } 599 out: 600 write_unlock_irq(&fasync_lock); 601 return result; 602 } 603 604 EXPORT_SYMBOL(fasync_helper); 605 606 void __kill_fasync(struct fasync_struct *fa, int sig, int band) 607 { 608 while (fa) { 609 struct fown_struct * fown; 610 if (fa->magic != FASYNC_MAGIC) { 611 printk(KERN_ERR "kill_fasync: bad magic number in " 612 "fasync_struct!\n"); 613 return; 614 } 615 fown = &fa->fa_file->f_owner; 616 /* Don't send SIGURG to processes which have not set a 617 queued signum: SIGURG has its own default signalling 618 mechanism. */ 619 if (!(sig == SIGURG && fown->signum == 0)) 620 send_sigio(fown, fa->fa_fd, band); 621 fa = fa->fa_next; 622 } 623 } 624 625 EXPORT_SYMBOL(__kill_fasync); 626 627 void kill_fasync(struct fasync_struct **fp, int sig, int band) 628 { 629 /* First a quick test without locking: usually 630 * the list is empty. 631 */ 632 if (*fp) { 633 read_lock(&fasync_lock); 634 /* reread *fp after obtaining the lock */ 635 __kill_fasync(*fp, sig, band); 636 read_unlock(&fasync_lock); 637 } 638 } 639 EXPORT_SYMBOL(kill_fasync); 640 641 static int __init fasync_init(void) 642 { 643 fasync_cache = kmem_cache_create("fasync_cache", 644 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); 645 return 0; 646 } 647 648 module_init(fasync_init) 649