1 /* 2 * linux/fs/fcntl.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/syscalls.h> 8 #include <linux/init.h> 9 #include <linux/mm.h> 10 #include <linux/fs.h> 11 #include <linux/file.h> 12 #include <linux/dnotify.h> 13 #include <linux/smp_lock.h> 14 #include <linux/slab.h> 15 #include <linux/module.h> 16 #include <linux/security.h> 17 #include <linux/ptrace.h> 18 #include <linux/signal.h> 19 #include <linux/rcupdate.h> 20 21 #include <asm/poll.h> 22 #include <asm/siginfo.h> 23 #include <asm/uaccess.h> 24 25 void fastcall set_close_on_exec(unsigned int fd, int flag) 26 { 27 struct files_struct *files = current->files; 28 struct fdtable *fdt; 29 spin_lock(&files->file_lock); 30 fdt = files_fdtable(files); 31 if (flag) 32 FD_SET(fd, fdt->close_on_exec); 33 else 34 FD_CLR(fd, fdt->close_on_exec); 35 spin_unlock(&files->file_lock); 36 } 37 38 static inline int get_close_on_exec(unsigned int fd) 39 { 40 struct files_struct *files = current->files; 41 struct fdtable *fdt; 42 int res; 43 rcu_read_lock(); 44 fdt = files_fdtable(files); 45 res = FD_ISSET(fd, fdt->close_on_exec); 46 rcu_read_unlock(); 47 return res; 48 } 49 50 /* 51 * locate_fd finds a free file descriptor in the open_fds fdset, 52 * expanding the fd arrays if necessary. Must be called with the 53 * file_lock held for write. 54 */ 55 56 static int locate_fd(struct files_struct *files, 57 struct file *file, unsigned int orig_start) 58 { 59 unsigned int newfd; 60 unsigned int start; 61 int error; 62 struct fdtable *fdt; 63 64 error = -EINVAL; 65 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 66 goto out; 67 68 repeat: 69 fdt = files_fdtable(files); 70 /* 71 * Someone might have closed fd's in the range 72 * orig_start..fdt->next_fd 73 */ 74 start = orig_start; 75 if (start < fdt->next_fd) 76 start = fdt->next_fd; 77 78 newfd = start; 79 if (start < fdt->max_fdset) { 80 newfd = find_next_zero_bit(fdt->open_fds->fds_bits, 81 fdt->max_fdset, start); 82 } 83 84 error = -EMFILE; 85 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 86 goto out; 87 88 error = expand_files(files, newfd); 89 if (error < 0) 90 goto out; 91 92 /* 93 * If we needed to expand the fs array we 94 * might have blocked - try again. 95 */ 96 if (error) 97 goto repeat; 98 99 /* 100 * We reacquired files_lock, so we are safe as long as 101 * we reacquire the fdtable pointer and use it while holding 102 * the lock, no one can free it during that time. 103 */ 104 fdt = files_fdtable(files); 105 if (start <= fdt->next_fd) 106 fdt->next_fd = newfd + 1; 107 108 error = newfd; 109 110 out: 111 return error; 112 } 113 114 static int dupfd(struct file *file, unsigned int start) 115 { 116 struct files_struct * files = current->files; 117 struct fdtable *fdt; 118 int fd; 119 120 spin_lock(&files->file_lock); 121 fd = locate_fd(files, file, start); 122 if (fd >= 0) { 123 /* locate_fd() may have expanded fdtable, load the ptr */ 124 fdt = files_fdtable(files); 125 FD_SET(fd, fdt->open_fds); 126 FD_CLR(fd, fdt->close_on_exec); 127 spin_unlock(&files->file_lock); 128 fd_install(fd, file); 129 } else { 130 spin_unlock(&files->file_lock); 131 fput(file); 132 } 133 134 return fd; 135 } 136 137 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 138 { 139 int err = -EBADF; 140 struct file * file, *tofree; 141 struct files_struct * files = current->files; 142 struct fdtable *fdt; 143 144 spin_lock(&files->file_lock); 145 if (!(file = fcheck(oldfd))) 146 goto out_unlock; 147 err = newfd; 148 if (newfd == oldfd) 149 goto out_unlock; 150 err = -EBADF; 151 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 152 goto out_unlock; 153 get_file(file); /* We are now finished with oldfd */ 154 155 err = expand_files(files, newfd); 156 if (err < 0) 157 goto out_fput; 158 159 /* To avoid races with open() and dup(), we will mark the fd as 160 * in-use in the open-file bitmap throughout the entire dup2() 161 * process. This is quite safe: do_close() uses the fd array 162 * entry, not the bitmap, to decide what work needs to be 163 * done. --sct */ 164 /* Doesn't work. open() might be there first. --AV */ 165 166 /* Yes. It's a race. In user space. Nothing sane to do */ 167 err = -EBUSY; 168 fdt = files_fdtable(files); 169 tofree = fdt->fd[newfd]; 170 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 171 goto out_fput; 172 173 rcu_assign_pointer(fdt->fd[newfd], file); 174 FD_SET(newfd, fdt->open_fds); 175 FD_CLR(newfd, fdt->close_on_exec); 176 spin_unlock(&files->file_lock); 177 178 if (tofree) 179 filp_close(tofree, files); 180 err = newfd; 181 out: 182 return err; 183 out_unlock: 184 spin_unlock(&files->file_lock); 185 goto out; 186 187 out_fput: 188 spin_unlock(&files->file_lock); 189 fput(file); 190 goto out; 191 } 192 193 asmlinkage long sys_dup(unsigned int fildes) 194 { 195 int ret = -EBADF; 196 struct file * file = fget(fildes); 197 198 if (file) 199 ret = dupfd(file, 0); 200 return ret; 201 } 202 203 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) 204 205 static int setfl(int fd, struct file * filp, unsigned long arg) 206 { 207 struct inode * inode = filp->f_dentry->d_inode; 208 int error = 0; 209 210 /* O_APPEND cannot be cleared if the file is marked as append-only */ 211 if (!(arg & O_APPEND) && IS_APPEND(inode)) 212 return -EPERM; 213 214 /* O_NOATIME can only be set by the owner or superuser */ 215 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 216 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) 217 return -EPERM; 218 219 /* required for strict SunOS emulation */ 220 if (O_NONBLOCK != O_NDELAY) 221 if (arg & O_NDELAY) 222 arg |= O_NONBLOCK; 223 224 if (arg & O_DIRECT) { 225 if (!filp->f_mapping || !filp->f_mapping->a_ops || 226 !filp->f_mapping->a_ops->direct_IO) 227 return -EINVAL; 228 } 229 230 if (filp->f_op && filp->f_op->check_flags) 231 error = filp->f_op->check_flags(arg); 232 if (error) 233 return error; 234 235 lock_kernel(); 236 if ((arg ^ filp->f_flags) & FASYNC) { 237 if (filp->f_op && filp->f_op->fasync) { 238 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 239 if (error < 0) 240 goto out; 241 } 242 } 243 244 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 245 out: 246 unlock_kernel(); 247 return error; 248 } 249 250 static void f_modown(struct file *filp, unsigned long pid, 251 uid_t uid, uid_t euid, int force) 252 { 253 write_lock_irq(&filp->f_owner.lock); 254 if (force || !filp->f_owner.pid) { 255 filp->f_owner.pid = pid; 256 filp->f_owner.uid = uid; 257 filp->f_owner.euid = euid; 258 } 259 write_unlock_irq(&filp->f_owner.lock); 260 } 261 262 int f_setown(struct file *filp, unsigned long arg, int force) 263 { 264 int err; 265 266 err = security_file_set_fowner(filp); 267 if (err) 268 return err; 269 270 f_modown(filp, arg, current->uid, current->euid, force); 271 return 0; 272 } 273 274 EXPORT_SYMBOL(f_setown); 275 276 void f_delown(struct file *filp) 277 { 278 f_modown(filp, 0, 0, 0, 1); 279 } 280 281 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 282 struct file *filp) 283 { 284 long err = -EINVAL; 285 286 switch (cmd) { 287 case F_DUPFD: 288 get_file(filp); 289 err = dupfd(filp, arg); 290 break; 291 case F_GETFD: 292 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 293 break; 294 case F_SETFD: 295 err = 0; 296 set_close_on_exec(fd, arg & FD_CLOEXEC); 297 break; 298 case F_GETFL: 299 err = filp->f_flags; 300 break; 301 case F_SETFL: 302 err = setfl(fd, filp, arg); 303 break; 304 case F_GETLK: 305 err = fcntl_getlk(filp, (struct flock __user *) arg); 306 break; 307 case F_SETLK: 308 case F_SETLKW: 309 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); 310 break; 311 case F_GETOWN: 312 /* 313 * XXX If f_owner is a process group, the 314 * negative return value will get converted 315 * into an error. Oops. If we keep the 316 * current syscall conventions, the only way 317 * to fix this will be in libc. 318 */ 319 err = filp->f_owner.pid; 320 force_successful_syscall_return(); 321 break; 322 case F_SETOWN: 323 err = f_setown(filp, arg, 1); 324 break; 325 case F_GETSIG: 326 err = filp->f_owner.signum; 327 break; 328 case F_SETSIG: 329 /* arg == 0 restores default behaviour. */ 330 if (!valid_signal(arg)) { 331 break; 332 } 333 err = 0; 334 filp->f_owner.signum = arg; 335 break; 336 case F_GETLEASE: 337 err = fcntl_getlease(filp); 338 break; 339 case F_SETLEASE: 340 err = fcntl_setlease(fd, filp, arg); 341 break; 342 case F_NOTIFY: 343 err = fcntl_dirnotify(fd, filp, arg); 344 break; 345 default: 346 break; 347 } 348 return err; 349 } 350 351 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) 352 { 353 struct file *filp; 354 long err = -EBADF; 355 356 filp = fget(fd); 357 if (!filp) 358 goto out; 359 360 err = security_file_fcntl(filp, cmd, arg); 361 if (err) { 362 fput(filp); 363 return err; 364 } 365 366 err = do_fcntl(fd, cmd, arg, filp); 367 368 fput(filp); 369 out: 370 return err; 371 } 372 373 #if BITS_PER_LONG == 32 374 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) 375 { 376 struct file * filp; 377 long err; 378 379 err = -EBADF; 380 filp = fget(fd); 381 if (!filp) 382 goto out; 383 384 err = security_file_fcntl(filp, cmd, arg); 385 if (err) { 386 fput(filp); 387 return err; 388 } 389 err = -EBADF; 390 391 switch (cmd) { 392 case F_GETLK64: 393 err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 394 break; 395 case F_SETLK64: 396 case F_SETLKW64: 397 err = fcntl_setlk64(fd, filp, cmd, 398 (struct flock64 __user *) arg); 399 break; 400 default: 401 err = do_fcntl(fd, cmd, arg, filp); 402 break; 403 } 404 fput(filp); 405 out: 406 return err; 407 } 408 #endif 409 410 /* Table to convert sigio signal codes into poll band bitmaps */ 411 412 static long band_table[NSIGPOLL] = { 413 POLLIN | POLLRDNORM, /* POLL_IN */ 414 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */ 415 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */ 416 POLLERR, /* POLL_ERR */ 417 POLLPRI | POLLRDBAND, /* POLL_PRI */ 418 POLLHUP | POLLERR /* POLL_HUP */ 419 }; 420 421 static inline int sigio_perm(struct task_struct *p, 422 struct fown_struct *fown, int sig) 423 { 424 return (((fown->euid == 0) || 425 (fown->euid == p->suid) || (fown->euid == p->uid) || 426 (fown->uid == p->suid) || (fown->uid == p->uid)) && 427 !security_file_send_sigiotask(p, fown, sig)); 428 } 429 430 static void send_sigio_to_task(struct task_struct *p, 431 struct fown_struct *fown, 432 int fd, 433 int reason) 434 { 435 if (!sigio_perm(p, fown, fown->signum)) 436 return; 437 438 switch (fown->signum) { 439 siginfo_t si; 440 default: 441 /* Queue a rt signal with the appropriate fd as its 442 value. We use SI_SIGIO as the source, not 443 SI_KERNEL, since kernel signals always get 444 delivered even if we can't queue. Failure to 445 queue in this case _should_ be reported; we fall 446 back to SIGIO in that case. --sct */ 447 si.si_signo = fown->signum; 448 si.si_errno = 0; 449 si.si_code = reason; 450 /* Make sure we are called with one of the POLL_* 451 reasons, otherwise we could leak kernel stack into 452 userspace. */ 453 if ((reason & __SI_MASK) != __SI_POLL) 454 BUG(); 455 if (reason - POLL_IN >= NSIGPOLL) 456 si.si_band = ~0L; 457 else 458 si.si_band = band_table[reason - POLL_IN]; 459 si.si_fd = fd; 460 if (!send_group_sig_info(fown->signum, &si, p)) 461 break; 462 /* fall-through: fall back on the old plain SIGIO signal */ 463 case 0: 464 send_group_sig_info(SIGIO, SEND_SIG_PRIV, p); 465 } 466 } 467 468 void send_sigio(struct fown_struct *fown, int fd, int band) 469 { 470 struct task_struct *p; 471 int pid; 472 473 read_lock(&fown->lock); 474 pid = fown->pid; 475 if (!pid) 476 goto out_unlock_fown; 477 478 read_lock(&tasklist_lock); 479 if (pid > 0) { 480 p = find_task_by_pid(pid); 481 if (p) { 482 send_sigio_to_task(p, fown, fd, band); 483 } 484 } else { 485 do_each_task_pid(-pid, PIDTYPE_PGID, p) { 486 send_sigio_to_task(p, fown, fd, band); 487 } while_each_task_pid(-pid, PIDTYPE_PGID, p); 488 } 489 read_unlock(&tasklist_lock); 490 out_unlock_fown: 491 read_unlock(&fown->lock); 492 } 493 494 static void send_sigurg_to_task(struct task_struct *p, 495 struct fown_struct *fown) 496 { 497 if (sigio_perm(p, fown, SIGURG)) 498 send_group_sig_info(SIGURG, SEND_SIG_PRIV, p); 499 } 500 501 int send_sigurg(struct fown_struct *fown) 502 { 503 struct task_struct *p; 504 int pid, ret = 0; 505 506 read_lock(&fown->lock); 507 pid = fown->pid; 508 if (!pid) 509 goto out_unlock_fown; 510 511 ret = 1; 512 513 read_lock(&tasklist_lock); 514 if (pid > 0) { 515 p = find_task_by_pid(pid); 516 if (p) { 517 send_sigurg_to_task(p, fown); 518 } 519 } else { 520 do_each_task_pid(-pid, PIDTYPE_PGID, p) { 521 send_sigurg_to_task(p, fown); 522 } while_each_task_pid(-pid, PIDTYPE_PGID, p); 523 } 524 read_unlock(&tasklist_lock); 525 out_unlock_fown: 526 read_unlock(&fown->lock); 527 return ret; 528 } 529 530 static DEFINE_RWLOCK(fasync_lock); 531 static kmem_cache_t *fasync_cache; 532 533 /* 534 * fasync_helper() is used by some character device drivers (mainly mice) 535 * to set up the fasync queue. It returns negative on error, 0 if it did 536 * no changes and positive if it added/deleted the entry. 537 */ 538 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 539 { 540 struct fasync_struct *fa, **fp; 541 struct fasync_struct *new = NULL; 542 int result = 0; 543 544 if (on) { 545 new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL); 546 if (!new) 547 return -ENOMEM; 548 } 549 write_lock_irq(&fasync_lock); 550 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 551 if (fa->fa_file == filp) { 552 if(on) { 553 fa->fa_fd = fd; 554 kmem_cache_free(fasync_cache, new); 555 } else { 556 *fp = fa->fa_next; 557 kmem_cache_free(fasync_cache, fa); 558 result = 1; 559 } 560 goto out; 561 } 562 } 563 564 if (on) { 565 new->magic = FASYNC_MAGIC; 566 new->fa_file = filp; 567 new->fa_fd = fd; 568 new->fa_next = *fapp; 569 *fapp = new; 570 result = 1; 571 } 572 out: 573 write_unlock_irq(&fasync_lock); 574 return result; 575 } 576 577 EXPORT_SYMBOL(fasync_helper); 578 579 void __kill_fasync(struct fasync_struct *fa, int sig, int band) 580 { 581 while (fa) { 582 struct fown_struct * fown; 583 if (fa->magic != FASYNC_MAGIC) { 584 printk(KERN_ERR "kill_fasync: bad magic number in " 585 "fasync_struct!\n"); 586 return; 587 } 588 fown = &fa->fa_file->f_owner; 589 /* Don't send SIGURG to processes which have not set a 590 queued signum: SIGURG has its own default signalling 591 mechanism. */ 592 if (!(sig == SIGURG && fown->signum == 0)) 593 send_sigio(fown, fa->fa_fd, band); 594 fa = fa->fa_next; 595 } 596 } 597 598 EXPORT_SYMBOL(__kill_fasync); 599 600 void kill_fasync(struct fasync_struct **fp, int sig, int band) 601 { 602 /* First a quick test without locking: usually 603 * the list is empty. 604 */ 605 if (*fp) { 606 read_lock(&fasync_lock); 607 /* reread *fp after obtaining the lock */ 608 __kill_fasync(*fp, sig, band); 609 read_unlock(&fasync_lock); 610 } 611 } 612 EXPORT_SYMBOL(kill_fasync); 613 614 static int __init fasync_init(void) 615 { 616 fasync_cache = kmem_cache_create("fasync_cache", 617 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL); 618 return 0; 619 } 620 621 module_init(fasync_init) 622