1 /* 2 * linux/fs/file.c 3 * 4 * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 5 * 6 * Manage the dynamic fd arrays in the process files_struct. 7 */ 8 9 #include <linux/syscalls.h> 10 #include <linux/export.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/mmzone.h> 14 #include <linux/time.h> 15 #include <linux/sched.h> 16 #include <linux/slab.h> 17 #include <linux/vmalloc.h> 18 #include <linux/file.h> 19 #include <linux/fdtable.h> 20 #include <linux/bitops.h> 21 #include <linux/interrupt.h> 22 #include <linux/spinlock.h> 23 #include <linux/rcupdate.h> 24 #include <linux/workqueue.h> 25 26 int sysctl_nr_open __read_mostly = 1024*1024; 27 int sysctl_nr_open_min = BITS_PER_LONG; 28 int sysctl_nr_open_max = 1024 * 1024; /* raised later */ 29 30 static void *alloc_fdmem(size_t size) 31 { 32 /* 33 * Very large allocations can stress page reclaim, so fall back to 34 * vmalloc() if the allocation size will be considered "large" by the VM. 35 */ 36 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 37 void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY); 38 if (data != NULL) 39 return data; 40 } 41 return vmalloc(size); 42 } 43 44 static void free_fdmem(void *ptr) 45 { 46 is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); 47 } 48 49 static void __free_fdtable(struct fdtable *fdt) 50 { 51 free_fdmem(fdt->fd); 52 free_fdmem(fdt->open_fds); 53 kfree(fdt); 54 } 55 56 static void free_fdtable_rcu(struct rcu_head *rcu) 57 { 58 __free_fdtable(container_of(rcu, struct fdtable, rcu)); 59 } 60 61 /* 62 * Expand the fdset in the files_struct. Called with the files spinlock 63 * held for write. 64 */ 65 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) 66 { 67 unsigned int cpy, set; 68 69 BUG_ON(nfdt->max_fds < ofdt->max_fds); 70 71 cpy = ofdt->max_fds * sizeof(struct file *); 72 set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); 73 memcpy(nfdt->fd, ofdt->fd, cpy); 74 memset((char *)(nfdt->fd) + cpy, 0, set); 75 76 cpy = ofdt->max_fds / BITS_PER_BYTE; 77 set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE; 78 memcpy(nfdt->open_fds, ofdt->open_fds, cpy); 79 memset((char *)(nfdt->open_fds) + cpy, 0, set); 80 memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); 81 memset((char *)(nfdt->close_on_exec) + cpy, 0, set); 82 } 83 84 static struct fdtable * alloc_fdtable(unsigned int nr) 85 { 86 struct fdtable *fdt; 87 void *data; 88 89 /* 90 * Figure out how many fds we actually want to support in this fdtable. 91 * Allocation steps are keyed to the size of the fdarray, since it 92 * grows far faster than any of the other dynamic data. We try to fit 93 * the fdarray into comfortable page-tuned chunks: starting at 1024B 94 * and growing in powers of two from there on. 95 */ 96 nr /= (1024 / sizeof(struct file *)); 97 nr = roundup_pow_of_two(nr + 1); 98 nr *= (1024 / sizeof(struct file *)); 99 /* 100 * Note that this can drive nr *below* what we had passed if sysctl_nr_open 101 * had been set lower between the check in expand_files() and here. Deal 102 * with that in caller, it's cheaper that way. 103 * 104 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise 105 * bitmaps handling below becomes unpleasant, to put it mildly... 106 */ 107 if (unlikely(nr > sysctl_nr_open)) 108 nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1; 109 110 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); 111 if (!fdt) 112 goto out; 113 fdt->max_fds = nr; 114 data = alloc_fdmem(nr * sizeof(struct file *)); 115 if (!data) 116 goto out_fdt; 117 fdt->fd = data; 118 119 data = alloc_fdmem(max_t(size_t, 120 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); 121 if (!data) 122 goto out_arr; 123 fdt->open_fds = data; 124 data += nr / BITS_PER_BYTE; 125 fdt->close_on_exec = data; 126 127 return fdt; 128 129 out_arr: 130 free_fdmem(fdt->fd); 131 out_fdt: 132 kfree(fdt); 133 out: 134 return NULL; 135 } 136 137 /* 138 * Expand the file descriptor table. 139 * This function will allocate a new fdtable and both fd array and fdset, of 140 * the given size. 141 * Return <0 error code on error; 1 on successful completion. 142 * The files->file_lock should be held on entry, and will be held on exit. 143 */ 144 static int expand_fdtable(struct files_struct *files, int nr) 145 __releases(files->file_lock) 146 __acquires(files->file_lock) 147 { 148 struct fdtable *new_fdt, *cur_fdt; 149 150 spin_unlock(&files->file_lock); 151 new_fdt = alloc_fdtable(nr); 152 spin_lock(&files->file_lock); 153 if (!new_fdt) 154 return -ENOMEM; 155 /* 156 * extremely unlikely race - sysctl_nr_open decreased between the check in 157 * caller and alloc_fdtable(). Cheaper to catch it here... 158 */ 159 if (unlikely(new_fdt->max_fds <= nr)) { 160 __free_fdtable(new_fdt); 161 return -EMFILE; 162 } 163 /* 164 * Check again since another task may have expanded the fd table while 165 * we dropped the lock 166 */ 167 cur_fdt = files_fdtable(files); 168 if (nr >= cur_fdt->max_fds) { 169 /* Continue as planned */ 170 copy_fdtable(new_fdt, cur_fdt); 171 rcu_assign_pointer(files->fdt, new_fdt); 172 if (cur_fdt != &files->fdtab) 173 call_rcu(&cur_fdt->rcu, free_fdtable_rcu); 174 } else { 175 /* Somebody else expanded, so undo our attempt */ 176 __free_fdtable(new_fdt); 177 } 178 return 1; 179 } 180 181 /* 182 * Expand files. 183 * This function will expand the file structures, if the requested size exceeds 184 * the current capacity and there is room for expansion. 185 * Return <0 error code on error; 0 when nothing done; 1 when files were 186 * expanded and execution may have blocked. 187 * The files->file_lock should be held on entry, and will be held on exit. 188 */ 189 static int expand_files(struct files_struct *files, int nr) 190 { 191 struct fdtable *fdt; 192 193 fdt = files_fdtable(files); 194 195 /* Do we need to expand? */ 196 if (nr < fdt->max_fds) 197 return 0; 198 199 /* Can we expand? */ 200 if (nr >= sysctl_nr_open) 201 return -EMFILE; 202 203 /* All good, so we try */ 204 return expand_fdtable(files, nr); 205 } 206 207 static inline void __set_close_on_exec(int fd, struct fdtable *fdt) 208 { 209 __set_bit(fd, fdt->close_on_exec); 210 } 211 212 static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) 213 { 214 __clear_bit(fd, fdt->close_on_exec); 215 } 216 217 static inline void __set_open_fd(int fd, struct fdtable *fdt) 218 { 219 __set_bit(fd, fdt->open_fds); 220 } 221 222 static inline void __clear_open_fd(int fd, struct fdtable *fdt) 223 { 224 __clear_bit(fd, fdt->open_fds); 225 } 226 227 static int count_open_files(struct fdtable *fdt) 228 { 229 int size = fdt->max_fds; 230 int i; 231 232 /* Find the last open fd */ 233 for (i = size / BITS_PER_LONG; i > 0; ) { 234 if (fdt->open_fds[--i]) 235 break; 236 } 237 i = (i + 1) * BITS_PER_LONG; 238 return i; 239 } 240 241 /* 242 * Allocate a new files structure and copy contents from the 243 * passed in files structure. 244 * errorp will be valid only when the returned files_struct is NULL. 245 */ 246 struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) 247 { 248 struct files_struct *newf; 249 struct file **old_fds, **new_fds; 250 int open_files, size, i; 251 struct fdtable *old_fdt, *new_fdt; 252 253 *errorp = -ENOMEM; 254 newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); 255 if (!newf) 256 goto out; 257 258 atomic_set(&newf->count, 1); 259 260 spin_lock_init(&newf->file_lock); 261 newf->next_fd = 0; 262 new_fdt = &newf->fdtab; 263 new_fdt->max_fds = NR_OPEN_DEFAULT; 264 new_fdt->close_on_exec = newf->close_on_exec_init; 265 new_fdt->open_fds = newf->open_fds_init; 266 new_fdt->fd = &newf->fd_array[0]; 267 268 spin_lock(&oldf->file_lock); 269 old_fdt = files_fdtable(oldf); 270 open_files = count_open_files(old_fdt); 271 272 /* 273 * Check whether we need to allocate a larger fd array and fd set. 274 */ 275 while (unlikely(open_files > new_fdt->max_fds)) { 276 spin_unlock(&oldf->file_lock); 277 278 if (new_fdt != &newf->fdtab) 279 __free_fdtable(new_fdt); 280 281 new_fdt = alloc_fdtable(open_files - 1); 282 if (!new_fdt) { 283 *errorp = -ENOMEM; 284 goto out_release; 285 } 286 287 /* beyond sysctl_nr_open; nothing to do */ 288 if (unlikely(new_fdt->max_fds < open_files)) { 289 __free_fdtable(new_fdt); 290 *errorp = -EMFILE; 291 goto out_release; 292 } 293 294 /* 295 * Reacquire the oldf lock and a pointer to its fd table 296 * who knows it may have a new bigger fd table. We need 297 * the latest pointer. 298 */ 299 spin_lock(&oldf->file_lock); 300 old_fdt = files_fdtable(oldf); 301 open_files = count_open_files(old_fdt); 302 } 303 304 old_fds = old_fdt->fd; 305 new_fds = new_fdt->fd; 306 307 memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); 308 memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); 309 310 for (i = open_files; i != 0; i--) { 311 struct file *f = *old_fds++; 312 if (f) { 313 get_file(f); 314 } else { 315 /* 316 * The fd may be claimed in the fd bitmap but not yet 317 * instantiated in the files array if a sibling thread 318 * is partway through open(). So make sure that this 319 * fd is available to the new process. 320 */ 321 __clear_open_fd(open_files - i, new_fdt); 322 } 323 rcu_assign_pointer(*new_fds++, f); 324 } 325 spin_unlock(&oldf->file_lock); 326 327 /* compute the remainder to be cleared */ 328 size = (new_fdt->max_fds - open_files) * sizeof(struct file *); 329 330 /* This is long word aligned thus could use a optimized version */ 331 memset(new_fds, 0, size); 332 333 if (new_fdt->max_fds > open_files) { 334 int left = (new_fdt->max_fds - open_files) / 8; 335 int start = open_files / BITS_PER_LONG; 336 337 memset(&new_fdt->open_fds[start], 0, left); 338 memset(&new_fdt->close_on_exec[start], 0, left); 339 } 340 341 rcu_assign_pointer(newf->fdt, new_fdt); 342 343 return newf; 344 345 out_release: 346 kmem_cache_free(files_cachep, newf); 347 out: 348 return NULL; 349 } 350 351 static struct fdtable *close_files(struct files_struct * files) 352 { 353 /* 354 * It is safe to dereference the fd table without RCU or 355 * ->file_lock because this is the last reference to the 356 * files structure. 357 */ 358 struct fdtable *fdt = rcu_dereference_raw(files->fdt); 359 int i, j = 0; 360 361 for (;;) { 362 unsigned long set; 363 i = j * BITS_PER_LONG; 364 if (i >= fdt->max_fds) 365 break; 366 set = fdt->open_fds[j++]; 367 while (set) { 368 if (set & 1) { 369 struct file * file = xchg(&fdt->fd[i], NULL); 370 if (file) { 371 filp_close(file, files); 372 cond_resched(); 373 } 374 } 375 i++; 376 set >>= 1; 377 } 378 } 379 380 return fdt; 381 } 382 383 struct files_struct *get_files_struct(struct task_struct *task) 384 { 385 struct files_struct *files; 386 387 task_lock(task); 388 files = task->files; 389 if (files) 390 atomic_inc(&files->count); 391 task_unlock(task); 392 393 return files; 394 } 395 396 void put_files_struct(struct files_struct *files) 397 { 398 if (atomic_dec_and_test(&files->count)) { 399 struct fdtable *fdt = close_files(files); 400 401 /* free the arrays if they are not embedded */ 402 if (fdt != &files->fdtab) 403 __free_fdtable(fdt); 404 kmem_cache_free(files_cachep, files); 405 } 406 } 407 408 void reset_files_struct(struct files_struct *files) 409 { 410 struct task_struct *tsk = current; 411 struct files_struct *old; 412 413 old = tsk->files; 414 task_lock(tsk); 415 tsk->files = files; 416 task_unlock(tsk); 417 put_files_struct(old); 418 } 419 420 void exit_files(struct task_struct *tsk) 421 { 422 struct files_struct * files = tsk->files; 423 424 if (files) { 425 task_lock(tsk); 426 tsk->files = NULL; 427 task_unlock(tsk); 428 put_files_struct(files); 429 } 430 } 431 432 void __init files_defer_init(void) 433 { 434 sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & 435 -BITS_PER_LONG; 436 } 437 438 struct files_struct init_files = { 439 .count = ATOMIC_INIT(1), 440 .fdt = &init_files.fdtab, 441 .fdtab = { 442 .max_fds = NR_OPEN_DEFAULT, 443 .fd = &init_files.fd_array[0], 444 .close_on_exec = init_files.close_on_exec_init, 445 .open_fds = init_files.open_fds_init, 446 }, 447 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), 448 }; 449 450 /* 451 * allocate a file descriptor, mark it busy. 452 */ 453 int __alloc_fd(struct files_struct *files, 454 unsigned start, unsigned end, unsigned flags) 455 { 456 unsigned int fd; 457 int error; 458 struct fdtable *fdt; 459 460 spin_lock(&files->file_lock); 461 repeat: 462 fdt = files_fdtable(files); 463 fd = start; 464 if (fd < files->next_fd) 465 fd = files->next_fd; 466 467 if (fd < fdt->max_fds) 468 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); 469 470 /* 471 * N.B. For clone tasks sharing a files structure, this test 472 * will limit the total number of files that can be opened. 473 */ 474 error = -EMFILE; 475 if (fd >= end) 476 goto out; 477 478 error = expand_files(files, fd); 479 if (error < 0) 480 goto out; 481 482 /* 483 * If we needed to expand the fs array we 484 * might have blocked - try again. 485 */ 486 if (error) 487 goto repeat; 488 489 if (start <= files->next_fd) 490 files->next_fd = fd + 1; 491 492 __set_open_fd(fd, fdt); 493 if (flags & O_CLOEXEC) 494 __set_close_on_exec(fd, fdt); 495 else 496 __clear_close_on_exec(fd, fdt); 497 error = fd; 498 #if 1 499 /* Sanity check */ 500 if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { 501 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); 502 rcu_assign_pointer(fdt->fd[fd], NULL); 503 } 504 #endif 505 506 out: 507 spin_unlock(&files->file_lock); 508 return error; 509 } 510 511 static int alloc_fd(unsigned start, unsigned flags) 512 { 513 return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); 514 } 515 516 int get_unused_fd_flags(unsigned flags) 517 { 518 return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); 519 } 520 EXPORT_SYMBOL(get_unused_fd_flags); 521 522 static void __put_unused_fd(struct files_struct *files, unsigned int fd) 523 { 524 struct fdtable *fdt = files_fdtable(files); 525 __clear_open_fd(fd, fdt); 526 if (fd < files->next_fd) 527 files->next_fd = fd; 528 } 529 530 void put_unused_fd(unsigned int fd) 531 { 532 struct files_struct *files = current->files; 533 spin_lock(&files->file_lock); 534 __put_unused_fd(files, fd); 535 spin_unlock(&files->file_lock); 536 } 537 538 EXPORT_SYMBOL(put_unused_fd); 539 540 /* 541 * Install a file pointer in the fd array. 542 * 543 * The VFS is full of places where we drop the files lock between 544 * setting the open_fds bitmap and installing the file in the file 545 * array. At any such point, we are vulnerable to a dup2() race 546 * installing a file in the array before us. We need to detect this and 547 * fput() the struct file we are about to overwrite in this case. 548 * 549 * It should never happen - if we allow dup2() do it, _really_ bad things 550 * will follow. 551 * 552 * NOTE: __fd_install() variant is really, really low-level; don't 553 * use it unless you are forced to by truly lousy API shoved down 554 * your throat. 'files' *MUST* be either current->files or obtained 555 * by get_files_struct(current) done by whoever had given it to you, 556 * or really bad things will happen. Normally you want to use 557 * fd_install() instead. 558 */ 559 560 void __fd_install(struct files_struct *files, unsigned int fd, 561 struct file *file) 562 { 563 struct fdtable *fdt; 564 spin_lock(&files->file_lock); 565 fdt = files_fdtable(files); 566 BUG_ON(fdt->fd[fd] != NULL); 567 rcu_assign_pointer(fdt->fd[fd], file); 568 spin_unlock(&files->file_lock); 569 } 570 571 void fd_install(unsigned int fd, struct file *file) 572 { 573 __fd_install(current->files, fd, file); 574 } 575 576 EXPORT_SYMBOL(fd_install); 577 578 /* 579 * The same warnings as for __alloc_fd()/__fd_install() apply here... 580 */ 581 int __close_fd(struct files_struct *files, unsigned fd) 582 { 583 struct file *file; 584 struct fdtable *fdt; 585 586 spin_lock(&files->file_lock); 587 fdt = files_fdtable(files); 588 if (fd >= fdt->max_fds) 589 goto out_unlock; 590 file = fdt->fd[fd]; 591 if (!file) 592 goto out_unlock; 593 rcu_assign_pointer(fdt->fd[fd], NULL); 594 __clear_close_on_exec(fd, fdt); 595 __put_unused_fd(files, fd); 596 spin_unlock(&files->file_lock); 597 return filp_close(file, files); 598 599 out_unlock: 600 spin_unlock(&files->file_lock); 601 return -EBADF; 602 } 603 604 void do_close_on_exec(struct files_struct *files) 605 { 606 unsigned i; 607 struct fdtable *fdt; 608 609 /* exec unshares first */ 610 spin_lock(&files->file_lock); 611 for (i = 0; ; i++) { 612 unsigned long set; 613 unsigned fd = i * BITS_PER_LONG; 614 fdt = files_fdtable(files); 615 if (fd >= fdt->max_fds) 616 break; 617 set = fdt->close_on_exec[i]; 618 if (!set) 619 continue; 620 fdt->close_on_exec[i] = 0; 621 for ( ; set ; fd++, set >>= 1) { 622 struct file *file; 623 if (!(set & 1)) 624 continue; 625 file = fdt->fd[fd]; 626 if (!file) 627 continue; 628 rcu_assign_pointer(fdt->fd[fd], NULL); 629 __put_unused_fd(files, fd); 630 spin_unlock(&files->file_lock); 631 filp_close(file, files); 632 cond_resched(); 633 spin_lock(&files->file_lock); 634 } 635 636 } 637 spin_unlock(&files->file_lock); 638 } 639 640 static struct file *__fget(unsigned int fd, fmode_t mask) 641 { 642 struct files_struct *files = current->files; 643 struct file *file; 644 645 rcu_read_lock(); 646 file = fcheck_files(files, fd); 647 if (file) { 648 /* File object ref couldn't be taken */ 649 if ((file->f_mode & mask) || 650 !atomic_long_inc_not_zero(&file->f_count)) 651 file = NULL; 652 } 653 rcu_read_unlock(); 654 655 return file; 656 } 657 658 struct file *fget(unsigned int fd) 659 { 660 return __fget(fd, FMODE_PATH); 661 } 662 EXPORT_SYMBOL(fget); 663 664 struct file *fget_raw(unsigned int fd) 665 { 666 return __fget(fd, 0); 667 } 668 EXPORT_SYMBOL(fget_raw); 669 670 /* 671 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 672 * 673 * You can use this instead of fget if you satisfy all of the following 674 * conditions: 675 * 1) You must call fput_light before exiting the syscall and returning control 676 * to userspace (i.e. you cannot remember the returned struct file * after 677 * returning to userspace). 678 * 2) You must not call filp_close on the returned struct file * in between 679 * calls to fget_light and fput_light. 680 * 3) You must not clone the current task in between the calls to fget_light 681 * and fput_light. 682 * 683 * The fput_needed flag returned by fget_light should be passed to the 684 * corresponding fput_light. 685 */ 686 struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed) 687 { 688 struct files_struct *files = current->files; 689 struct file *file; 690 691 *fput_needed = 0; 692 if (atomic_read(&files->count) == 1) { 693 file = __fcheck_files(files, fd); 694 if (file && (file->f_mode & mask)) 695 file = NULL; 696 } else { 697 file = __fget(fd, mask); 698 if (file) 699 *fput_needed = 1; 700 } 701 702 return file; 703 } 704 struct file *fget_light(unsigned int fd, int *fput_needed) 705 { 706 return __fget_light(fd, FMODE_PATH, fput_needed); 707 } 708 EXPORT_SYMBOL(fget_light); 709 710 struct file *fget_raw_light(unsigned int fd, int *fput_needed) 711 { 712 return __fget_light(fd, 0, fput_needed); 713 } 714 715 void set_close_on_exec(unsigned int fd, int flag) 716 { 717 struct files_struct *files = current->files; 718 struct fdtable *fdt; 719 spin_lock(&files->file_lock); 720 fdt = files_fdtable(files); 721 if (flag) 722 __set_close_on_exec(fd, fdt); 723 else 724 __clear_close_on_exec(fd, fdt); 725 spin_unlock(&files->file_lock); 726 } 727 728 bool get_close_on_exec(unsigned int fd) 729 { 730 struct files_struct *files = current->files; 731 struct fdtable *fdt; 732 bool res; 733 rcu_read_lock(); 734 fdt = files_fdtable(files); 735 res = close_on_exec(fd, fdt); 736 rcu_read_unlock(); 737 return res; 738 } 739 740 static int do_dup2(struct files_struct *files, 741 struct file *file, unsigned fd, unsigned flags) 742 { 743 struct file *tofree; 744 struct fdtable *fdt; 745 746 /* 747 * We need to detect attempts to do dup2() over allocated but still 748 * not finished descriptor. NB: OpenBSD avoids that at the price of 749 * extra work in their equivalent of fget() - they insert struct 750 * file immediately after grabbing descriptor, mark it larval if 751 * more work (e.g. actual opening) is needed and make sure that 752 * fget() treats larval files as absent. Potentially interesting, 753 * but while extra work in fget() is trivial, locking implications 754 * and amount of surgery on open()-related paths in VFS are not. 755 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" 756 * deadlocks in rather amusing ways, AFAICS. All of that is out of 757 * scope of POSIX or SUS, since neither considers shared descriptor 758 * tables and this condition does not arise without those. 759 */ 760 fdt = files_fdtable(files); 761 tofree = fdt->fd[fd]; 762 if (!tofree && fd_is_open(fd, fdt)) 763 goto Ebusy; 764 get_file(file); 765 rcu_assign_pointer(fdt->fd[fd], file); 766 __set_open_fd(fd, fdt); 767 if (flags & O_CLOEXEC) 768 __set_close_on_exec(fd, fdt); 769 else 770 __clear_close_on_exec(fd, fdt); 771 spin_unlock(&files->file_lock); 772 773 if (tofree) 774 filp_close(tofree, files); 775 776 return fd; 777 778 Ebusy: 779 spin_unlock(&files->file_lock); 780 return -EBUSY; 781 } 782 783 int replace_fd(unsigned fd, struct file *file, unsigned flags) 784 { 785 int err; 786 struct files_struct *files = current->files; 787 788 if (!file) 789 return __close_fd(files, fd); 790 791 if (fd >= rlimit(RLIMIT_NOFILE)) 792 return -EBADF; 793 794 spin_lock(&files->file_lock); 795 err = expand_files(files, fd); 796 if (unlikely(err < 0)) 797 goto out_unlock; 798 return do_dup2(files, file, fd, flags); 799 800 out_unlock: 801 spin_unlock(&files->file_lock); 802 return err; 803 } 804 805 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) 806 { 807 int err = -EBADF; 808 struct file *file; 809 struct files_struct *files = current->files; 810 811 if ((flags & ~O_CLOEXEC) != 0) 812 return -EINVAL; 813 814 if (unlikely(oldfd == newfd)) 815 return -EINVAL; 816 817 if (newfd >= rlimit(RLIMIT_NOFILE)) 818 return -EBADF; 819 820 spin_lock(&files->file_lock); 821 err = expand_files(files, newfd); 822 file = fcheck(oldfd); 823 if (unlikely(!file)) 824 goto Ebadf; 825 if (unlikely(err < 0)) { 826 if (err == -EMFILE) 827 goto Ebadf; 828 goto out_unlock; 829 } 830 return do_dup2(files, file, newfd, flags); 831 832 Ebadf: 833 err = -EBADF; 834 out_unlock: 835 spin_unlock(&files->file_lock); 836 return err; 837 } 838 839 SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) 840 { 841 if (unlikely(newfd == oldfd)) { /* corner case */ 842 struct files_struct *files = current->files; 843 int retval = oldfd; 844 845 rcu_read_lock(); 846 if (!fcheck_files(files, oldfd)) 847 retval = -EBADF; 848 rcu_read_unlock(); 849 return retval; 850 } 851 return sys_dup3(oldfd, newfd, 0); 852 } 853 854 SYSCALL_DEFINE1(dup, unsigned int, fildes) 855 { 856 int ret = -EBADF; 857 struct file *file = fget_raw(fildes); 858 859 if (file) { 860 ret = get_unused_fd(); 861 if (ret >= 0) 862 fd_install(ret, file); 863 else 864 fput(file); 865 } 866 return ret; 867 } 868 869 int f_dupfd(unsigned int from, struct file *file, unsigned flags) 870 { 871 int err; 872 if (from >= rlimit(RLIMIT_NOFILE)) 873 return -EINVAL; 874 err = alloc_fd(from, flags); 875 if (err >= 0) { 876 get_file(file); 877 fd_install(err, file); 878 } 879 return err; 880 } 881 882 int iterate_fd(struct files_struct *files, unsigned n, 883 int (*f)(const void *, struct file *, unsigned), 884 const void *p) 885 { 886 struct fdtable *fdt; 887 int res = 0; 888 if (!files) 889 return 0; 890 spin_lock(&files->file_lock); 891 for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { 892 struct file *file; 893 file = rcu_dereference_check_fdtable(files, fdt->fd[n]); 894 if (!file) 895 continue; 896 res = f(p, file, n); 897 if (res) 898 break; 899 } 900 spin_unlock(&files->file_lock); 901 return res; 902 } 903 EXPORT_SYMBOL(iterate_fd); 904