1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * linux/fs/file.c
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Manage the dynamic fd arrays in the process files_struct.
81da177e4SLinus Torvalds */
91da177e4SLinus Torvalds
10fe17f22dSAl Viro #include <linux/syscalls.h>
11630d9c47SPaul Gortmaker #include <linux/export.h>
121da177e4SLinus Torvalds #include <linux/fs.h>
13278a5fbaSChristian Brauner #include <linux/kernel.h>
141da177e4SLinus Torvalds #include <linux/mm.h>
153f07c014SIngo Molnar #include <linux/sched/signal.h>
161da177e4SLinus Torvalds #include <linux/slab.h>
171da177e4SLinus Torvalds #include <linux/file.h>
189f3acc31SAl Viro #include <linux/fdtable.h>
191da177e4SLinus Torvalds #include <linux/bitops.h>
20ab2af1f5SDipankar Sarma #include <linux/spinlock.h>
21ab2af1f5SDipankar Sarma #include <linux/rcupdate.h>
2260997c3dSChristian Brauner #include <linux/close_range.h>
2366590610SKees Cook #include <net/sock.h>
24*62861a5dSZhang Kunbo #include <linux/init_task.h>
25ab2af1f5SDipankar Sarma
2653dec2eaSJens Axboe #include "internal.h"
2753dec2eaSJens Axboe
289b80a184SAlexey Dobriyan unsigned int sysctl_nr_open __read_mostly = 1024*1024;
299b80a184SAlexey Dobriyan unsigned int sysctl_nr_open_min = BITS_PER_LONG;
30752343beSRasmus Villemoes /* our min() is unusable in constant expressions ;-/ */
31752343beSRasmus Villemoes #define __const_min(x, y) ((x) < (y) ? (x) : (y))
329b80a184SAlexey Dobriyan unsigned int sysctl_nr_open_max =
339b80a184SAlexey Dobriyan __const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
349cfe015aSEric Dumazet
__free_fdtable(struct fdtable * fdt)35a892e2d7SChangli Gao static void __free_fdtable(struct fdtable *fdt)
361da177e4SLinus Torvalds {
37f6c0a192SAl Viro kvfree(fdt->fd);
38f6c0a192SAl Viro kvfree(fdt->open_fds);
39a892e2d7SChangli Gao kfree(fdt);
40ab2af1f5SDipankar Sarma }
41ab2af1f5SDipankar Sarma
free_fdtable_rcu(struct rcu_head * rcu)427cf4dc3cSAl Viro static void free_fdtable_rcu(struct rcu_head *rcu)
43ab2af1f5SDipankar Sarma {
44ac3e3c5bSAl Viro __free_fdtable(container_of(rcu, struct fdtable, rcu));
45ab2af1f5SDipankar Sarma }
46ab2af1f5SDipankar Sarma
47f3f86e33SLinus Torvalds #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
48f3f86e33SLinus Torvalds #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
49f3f86e33SLinus Torvalds
50dd72ae8bSAl Viro #define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
511da177e4SLinus Torvalds /*
52ea5c58e7SEric Biggers * Copy 'count' fd bits from the old table to the new table and clear the extra
53ea5c58e7SEric Biggers * space if any. This does not copy the file pointers. Called with the files
54ea5c58e7SEric Biggers * spinlock held for write.
55ea5c58e7SEric Biggers */
copy_fd_bitmaps(struct fdtable * nfdt,struct fdtable * ofdt,unsigned int copy_words)56dd72ae8bSAl Viro static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
57dd72ae8bSAl Viro unsigned int copy_words)
58ea5c58e7SEric Biggers {
59dd72ae8bSAl Viro unsigned int nwords = fdt_words(nfdt);
60ea5c58e7SEric Biggers
61dd72ae8bSAl Viro bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds,
62dd72ae8bSAl Viro copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
63dd72ae8bSAl Viro bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec,
64dd72ae8bSAl Viro copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
65dd72ae8bSAl Viro bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits,
66dd72ae8bSAl Viro copy_words, nwords);
67ea5c58e7SEric Biggers }
68ea5c58e7SEric Biggers
69ea5c58e7SEric Biggers /*
70ea5c58e7SEric Biggers * Copy all file descriptors from the old table to the new, expanded table and
71ea5c58e7SEric Biggers * clear the extra space. Called with the files spinlock held for write.
721da177e4SLinus Torvalds */
copy_fdtable(struct fdtable * nfdt,struct fdtable * ofdt)735466b456SVadim Lobanov static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
74ab2af1f5SDipankar Sarma {
754e89b721SAl Viro size_t cpy, set;
761da177e4SLinus Torvalds
775466b456SVadim Lobanov BUG_ON(nfdt->max_fds < ofdt->max_fds);
785466b456SVadim Lobanov
795466b456SVadim Lobanov cpy = ofdt->max_fds * sizeof(struct file *);
805466b456SVadim Lobanov set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
815466b456SVadim Lobanov memcpy(nfdt->fd, ofdt->fd, cpy);
82ea5c58e7SEric Biggers memset((char *)nfdt->fd + cpy, 0, set);
835466b456SVadim Lobanov
84dd72ae8bSAl Viro copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
851da177e4SLinus Torvalds }
861da177e4SLinus Torvalds
871c24a186SLinus Torvalds /*
881c24a186SLinus Torvalds * Note how the fdtable bitmap allocations very much have to be a multiple of
891c24a186SLinus Torvalds * BITS_PER_LONG. This is not only because we walk those things in chunks of
901c24a186SLinus Torvalds * 'unsigned long' in some places, but simply because that is how the Linux
911c24a186SLinus Torvalds * kernel bitmaps are defined to work: they are not "bits in an array of bytes",
921c24a186SLinus Torvalds * they are very much "bits in an array of unsigned long".
931c24a186SLinus Torvalds *
941c24a186SLinus Torvalds * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
951c24a186SLinus Torvalds * by that "1024/sizeof(ptr)" before, we already know there are sufficient
961c24a186SLinus Torvalds * clear low bits. Clang seems to realize that, gcc ends up being confused.
971c24a186SLinus Torvalds *
981c24a186SLinus Torvalds * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
991c24a186SLinus Torvalds * let's consider it documentation (and maybe a test-case for gcc to improve
1001c24a186SLinus Torvalds * its code generation ;)
1011c24a186SLinus Torvalds */
alloc_fdtable(unsigned int nr)1025466b456SVadim Lobanov static struct fdtable * alloc_fdtable(unsigned int nr)
1031da177e4SLinus Torvalds {
1045466b456SVadim Lobanov struct fdtable *fdt;
1051fd36adcSDavid Howells void *data;
1061da177e4SLinus Torvalds
1075466b456SVadim Lobanov /*
1085466b456SVadim Lobanov * Figure out how many fds we actually want to support in this fdtable.
1095466b456SVadim Lobanov * Allocation steps are keyed to the size of the fdarray, since it
1105466b456SVadim Lobanov * grows far faster than any of the other dynamic data. We try to fit
1115466b456SVadim Lobanov * the fdarray into comfortable page-tuned chunks: starting at 1024B
1125466b456SVadim Lobanov * and growing in powers of two from there on.
1135466b456SVadim Lobanov */
1145466b456SVadim Lobanov nr /= (1024 / sizeof(struct file *));
1155466b456SVadim Lobanov nr = roundup_pow_of_two(nr + 1);
1165466b456SVadim Lobanov nr *= (1024 / sizeof(struct file *));
1171c24a186SLinus Torvalds nr = ALIGN(nr, BITS_PER_LONG);
1185c598b34SAl Viro /*
1195c598b34SAl Viro * Note that this can drive nr *below* what we had passed if sysctl_nr_open
1205c598b34SAl Viro * had been set lower between the check in expand_files() and here. Deal
1215c598b34SAl Viro * with that in caller, it's cheaper that way.
1225c598b34SAl Viro *
1235c598b34SAl Viro * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
1245c598b34SAl Viro * bitmaps handling below becomes unpleasant, to put it mildly...
1255c598b34SAl Viro */
1265c598b34SAl Viro if (unlikely(nr > sysctl_nr_open))
1275c598b34SAl Viro nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
1285466b456SVadim Lobanov
1295d097056SVladimir Davydov fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
130ab2af1f5SDipankar Sarma if (!fdt)
1311da177e4SLinus Torvalds goto out;
1325466b456SVadim Lobanov fdt->max_fds = nr;
133c823bd92SMichal Hocko data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
1345466b456SVadim Lobanov if (!data)
1355466b456SVadim Lobanov goto out_fdt;
1361fd36adcSDavid Howells fdt->fd = data;
1371fd36adcSDavid Howells
138c823bd92SMichal Hocko data = kvmalloc(max_t(size_t,
139c823bd92SMichal Hocko 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
140c823bd92SMichal Hocko GFP_KERNEL_ACCOUNT);
1415466b456SVadim Lobanov if (!data)
1425466b456SVadim Lobanov goto out_arr;
1431fd36adcSDavid Howells fdt->open_fds = data;
1445466b456SVadim Lobanov data += nr / BITS_PER_BYTE;
1451fd36adcSDavid Howells fdt->close_on_exec = data;
146f3f86e33SLinus Torvalds data += nr / BITS_PER_BYTE;
147f3f86e33SLinus Torvalds fdt->full_fds_bits = data;
1481da177e4SLinus Torvalds
149ab2af1f5SDipankar Sarma return fdt;
1505466b456SVadim Lobanov
1515466b456SVadim Lobanov out_arr:
152f6c0a192SAl Viro kvfree(fdt->fd);
1535466b456SVadim Lobanov out_fdt:
154ab2af1f5SDipankar Sarma kfree(fdt);
1555466b456SVadim Lobanov out:
156ab2af1f5SDipankar Sarma return NULL;
157ab2af1f5SDipankar Sarma }
158ab2af1f5SDipankar Sarma
159ab2af1f5SDipankar Sarma /*
16074d392aaSVadim Lobanov * Expand the file descriptor table.
16174d392aaSVadim Lobanov * This function will allocate a new fdtable and both fd array and fdset, of
16274d392aaSVadim Lobanov * the given size.
16374d392aaSVadim Lobanov * Return <0 error code on error; 1 on successful completion.
16474d392aaSVadim Lobanov * The files->file_lock should be held on entry, and will be held on exit.
165ab2af1f5SDipankar Sarma */
expand_fdtable(struct files_struct * files,unsigned int nr)1669b80a184SAlexey Dobriyan static int expand_fdtable(struct files_struct *files, unsigned int nr)
167ab2af1f5SDipankar Sarma __releases(files->file_lock)
168ab2af1f5SDipankar Sarma __acquires(files->file_lock)
169ab2af1f5SDipankar Sarma {
17074d392aaSVadim Lobanov struct fdtable *new_fdt, *cur_fdt;
171ab2af1f5SDipankar Sarma
172ab2af1f5SDipankar Sarma spin_unlock(&files->file_lock);
17374d392aaSVadim Lobanov new_fdt = alloc_fdtable(nr);
1748a81252bSEric Dumazet
175d74ba04dSEric W. Biederman /* make sure all fd_install() have seen resize_in_progress
1768a81252bSEric Dumazet * or have finished their rcu_read_lock_sched() section.
1778a81252bSEric Dumazet */
1788a81252bSEric Dumazet if (atomic_read(&files->count) > 1)
179c93ffc15SPaul E. McKenney synchronize_rcu();
1808a81252bSEric Dumazet
1811da177e4SLinus Torvalds spin_lock(&files->file_lock);
18274d392aaSVadim Lobanov if (!new_fdt)
18374d392aaSVadim Lobanov return -ENOMEM;
184ab2af1f5SDipankar Sarma /*
1855c598b34SAl Viro * extremely unlikely race - sysctl_nr_open decreased between the check in
1865c598b34SAl Viro * caller and alloc_fdtable(). Cheaper to catch it here...
1875c598b34SAl Viro */
1885c598b34SAl Viro if (unlikely(new_fdt->max_fds <= nr)) {
189a892e2d7SChangli Gao __free_fdtable(new_fdt);
1905c598b34SAl Viro return -EMFILE;
1915c598b34SAl Viro }
19274d392aaSVadim Lobanov cur_fdt = files_fdtable(files);
1938a81252bSEric Dumazet BUG_ON(nr < cur_fdt->max_fds);
19474d392aaSVadim Lobanov copy_fdtable(new_fdt, cur_fdt);
19574d392aaSVadim Lobanov rcu_assign_pointer(files->fdt, new_fdt);
196ac3e3c5bSAl Viro if (cur_fdt != &files->fdtab)
1971983e781SAl Viro call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
198d74ba04dSEric W. Biederman /* coupled with smp_rmb() in fd_install() */
1998a81252bSEric Dumazet smp_wmb();
20074d392aaSVadim Lobanov return 1;
2011da177e4SLinus Torvalds }
2021da177e4SLinus Torvalds
2031da177e4SLinus Torvalds /*
2041da177e4SLinus Torvalds * Expand files.
20574d392aaSVadim Lobanov * This function will expand the file structures, if the requested size exceeds
20674d392aaSVadim Lobanov * the current capacity and there is room for expansion.
20774d392aaSVadim Lobanov * Return <0 error code on error; 0 when nothing done; 1 when files were
20874d392aaSVadim Lobanov * expanded and execution may have blocked.
20974d392aaSVadim Lobanov * The files->file_lock should be held on entry, and will be held on exit.
2101da177e4SLinus Torvalds */
expand_files(struct files_struct * files,unsigned int nr)2119b80a184SAlexey Dobriyan static int expand_files(struct files_struct *files, unsigned int nr)
2128a81252bSEric Dumazet __releases(files->file_lock)
2138a81252bSEric Dumazet __acquires(files->file_lock)
2141da177e4SLinus Torvalds {
215badf1662SDipankar Sarma struct fdtable *fdt;
2168a81252bSEric Dumazet int expanded = 0;
2171da177e4SLinus Torvalds
2188a81252bSEric Dumazet repeat:
219badf1662SDipankar Sarma fdt = files_fdtable(files);
2204e1e018eSAl Viro
22174d392aaSVadim Lobanov /* Do we need to expand? */
222bbea9f69SVadim Lobanov if (nr < fdt->max_fds)
2238a81252bSEric Dumazet return expanded;
2244e1e018eSAl Viro
22574d392aaSVadim Lobanov /* Can we expand? */
2269cfe015aSEric Dumazet if (nr >= sysctl_nr_open)
22774d392aaSVadim Lobanov return -EMFILE;
22874d392aaSVadim Lobanov
2298a81252bSEric Dumazet if (unlikely(files->resize_in_progress)) {
2308a81252bSEric Dumazet spin_unlock(&files->file_lock);
2318a81252bSEric Dumazet expanded = 1;
2328a81252bSEric Dumazet wait_event(files->resize_wait, !files->resize_in_progress);
2338a81252bSEric Dumazet spin_lock(&files->file_lock);
2348a81252bSEric Dumazet goto repeat;
2358a81252bSEric Dumazet }
2368a81252bSEric Dumazet
23774d392aaSVadim Lobanov /* All good, so we try */
2388a81252bSEric Dumazet files->resize_in_progress = true;
2398a81252bSEric Dumazet expanded = expand_fdtable(files, nr);
2408a81252bSEric Dumazet files->resize_in_progress = false;
2418a81252bSEric Dumazet
2428a81252bSEric Dumazet wake_up_all(&files->resize_wait);
2438a81252bSEric Dumazet return expanded;
2441da177e4SLinus Torvalds }
245ab2af1f5SDipankar Sarma
__set_close_on_exec(unsigned int fd,struct fdtable * fdt)2469b80a184SAlexey Dobriyan static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
247b8318b01SAl Viro {
248b8318b01SAl Viro __set_bit(fd, fdt->close_on_exec);
249b8318b01SAl Viro }
250b8318b01SAl Viro
__clear_close_on_exec(unsigned int fd,struct fdtable * fdt)2519b80a184SAlexey Dobriyan static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
252b8318b01SAl Viro {
253fc90888dSLinus Torvalds if (test_bit(fd, fdt->close_on_exec))
254b8318b01SAl Viro __clear_bit(fd, fdt->close_on_exec);
255b8318b01SAl Viro }
256b8318b01SAl Viro
__set_open_fd(unsigned int fd,struct fdtable * fdt)257f3f86e33SLinus Torvalds static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
258b8318b01SAl Viro {
259b8318b01SAl Viro __set_bit(fd, fdt->open_fds);
260f3f86e33SLinus Torvalds fd /= BITS_PER_LONG;
261f3f86e33SLinus Torvalds if (!~fdt->open_fds[fd])
262f3f86e33SLinus Torvalds __set_bit(fd, fdt->full_fds_bits);
263b8318b01SAl Viro }
264b8318b01SAl Viro
__clear_open_fd(unsigned int fd,struct fdtable * fdt)265f3f86e33SLinus Torvalds static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
266b8318b01SAl Viro {
267b8318b01SAl Viro __clear_bit(fd, fdt->open_fds);
268f3f86e33SLinus Torvalds __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
269b8318b01SAl Viro }
270b8318b01SAl Viro
2711c24a186SLinus Torvalds /*
2721c24a186SLinus Torvalds * Note that a sane fdtable size always has to be a multiple of
2731c24a186SLinus Torvalds * BITS_PER_LONG, since we have bitmaps that are sized by this.
2741c24a186SLinus Torvalds *
275a8023f8bSAl Viro * punch_hole is optional - when close_range() is asked to unshare
276a8023f8bSAl Viro * and close, we don't need to copy descriptors in that range, so
277a8023f8bSAl Viro * a smaller cloned descriptor table might suffice if the last
278a8023f8bSAl Viro * currently opened descriptor falls into that range.
2791c24a186SLinus Torvalds */
sane_fdtable_size(struct fdtable * fdt,struct fd_range * punch_hole)280a8023f8bSAl Viro static unsigned int sane_fdtable_size(struct fdtable *fdt, struct fd_range *punch_hole)
28160997c3dSChristian Brauner {
282a8023f8bSAl Viro unsigned int last = find_last_bit(fdt->open_fds, fdt->max_fds);
28360997c3dSChristian Brauner
284a8023f8bSAl Viro if (last == fdt->max_fds)
285a8023f8bSAl Viro return NR_OPEN_DEFAULT;
286a8023f8bSAl Viro if (punch_hole && punch_hole->to >= last && punch_hole->from <= last) {
287a8023f8bSAl Viro last = find_last_bit(fdt->open_fds, punch_hole->from);
288a8023f8bSAl Viro if (last == punch_hole->from)
289a8023f8bSAl Viro return NR_OPEN_DEFAULT;
290a8023f8bSAl Viro }
291a8023f8bSAl Viro return ALIGN(last + 1, BITS_PER_LONG);
29260997c3dSChristian Brauner }
29360997c3dSChristian Brauner
29402afc626SAl Viro /*
295a8023f8bSAl Viro * Allocate a new descriptor table and copy contents from the passed in
296a8023f8bSAl Viro * instance. Returns a pointer to cloned table on success, ERR_PTR()
297a8023f8bSAl Viro * on failure. For 'punch_hole' see sane_fdtable_size().
29802afc626SAl Viro */
dup_fd(struct files_struct * oldf,struct fd_range * punch_hole)299a8023f8bSAl Viro struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_hole)
30002afc626SAl Viro {
30102afc626SAl Viro struct files_struct *newf;
30202afc626SAl Viro struct file **old_fds, **new_fds;
3039b80a184SAlexey Dobriyan unsigned int open_files, i;
30402afc626SAl Viro struct fdtable *old_fdt, *new_fdt;
305a8023f8bSAl Viro int error;
30602afc626SAl Viro
307afbec7ffSAl Viro newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
30802afc626SAl Viro if (!newf)
309a8023f8bSAl Viro return ERR_PTR(-ENOMEM);
31002afc626SAl Viro
311afbec7ffSAl Viro atomic_set(&newf->count, 1);
312afbec7ffSAl Viro
313afbec7ffSAl Viro spin_lock_init(&newf->file_lock);
3148a81252bSEric Dumazet newf->resize_in_progress = false;
3158a81252bSEric Dumazet init_waitqueue_head(&newf->resize_wait);
316afbec7ffSAl Viro newf->next_fd = 0;
317afbec7ffSAl Viro new_fdt = &newf->fdtab;
318afbec7ffSAl Viro new_fdt->max_fds = NR_OPEN_DEFAULT;
3191fd36adcSDavid Howells new_fdt->close_on_exec = newf->close_on_exec_init;
3201fd36adcSDavid Howells new_fdt->open_fds = newf->open_fds_init;
321f3f86e33SLinus Torvalds new_fdt->full_fds_bits = newf->full_fds_bits_init;
322afbec7ffSAl Viro new_fdt->fd = &newf->fd_array[0];
323afbec7ffSAl Viro
32402afc626SAl Viro spin_lock(&oldf->file_lock);
32502afc626SAl Viro old_fdt = files_fdtable(oldf);
326a8023f8bSAl Viro open_files = sane_fdtable_size(old_fdt, punch_hole);
32702afc626SAl Viro
32802afc626SAl Viro /*
32902afc626SAl Viro * Check whether we need to allocate a larger fd array and fd set.
33002afc626SAl Viro */
331adbecb12SAl Viro while (unlikely(open_files > new_fdt->max_fds)) {
33202afc626SAl Viro spin_unlock(&oldf->file_lock);
3339dec3c4dSAl Viro
334a892e2d7SChangli Gao if (new_fdt != &newf->fdtab)
335a892e2d7SChangli Gao __free_fdtable(new_fdt);
336adbecb12SAl Viro
3379dec3c4dSAl Viro new_fdt = alloc_fdtable(open_files - 1);
3389dec3c4dSAl Viro if (!new_fdt) {
339a8023f8bSAl Viro error = -ENOMEM;
34002afc626SAl Viro goto out_release;
3419dec3c4dSAl Viro }
3429dec3c4dSAl Viro
3439dec3c4dSAl Viro /* beyond sysctl_nr_open; nothing to do */
3449dec3c4dSAl Viro if (unlikely(new_fdt->max_fds < open_files)) {
345a892e2d7SChangli Gao __free_fdtable(new_fdt);
346a8023f8bSAl Viro error = -EMFILE;
3479dec3c4dSAl Viro goto out_release;
3489dec3c4dSAl Viro }
3499dec3c4dSAl Viro
35002afc626SAl Viro /*
35102afc626SAl Viro * Reacquire the oldf lock and a pointer to its fd table
35202afc626SAl Viro * who knows it may have a new bigger fd table. We need
35302afc626SAl Viro * the latest pointer.
35402afc626SAl Viro */
35502afc626SAl Viro spin_lock(&oldf->file_lock);
35602afc626SAl Viro old_fdt = files_fdtable(oldf);
357a8023f8bSAl Viro open_files = sane_fdtable_size(old_fdt, punch_hole);
35802afc626SAl Viro }
35902afc626SAl Viro
360dd72ae8bSAl Viro copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
361ea5c58e7SEric Biggers
36202afc626SAl Viro old_fds = old_fdt->fd;
36302afc626SAl Viro new_fds = new_fdt->fd;
36402afc626SAl Viro
36502afc626SAl Viro for (i = open_files; i != 0; i--) {
36602afc626SAl Viro struct file *f = *old_fds++;
36702afc626SAl Viro if (f) {
36802afc626SAl Viro get_file(f);
36902afc626SAl Viro } else {
37002afc626SAl Viro /*
37102afc626SAl Viro * The fd may be claimed in the fd bitmap but not yet
37202afc626SAl Viro * instantiated in the files array if a sibling thread
37302afc626SAl Viro * is partway through open(). So make sure that this
37402afc626SAl Viro * fd is available to the new process.
37502afc626SAl Viro */
3761dce27c5SDavid Howells __clear_open_fd(open_files - i, new_fdt);
37702afc626SAl Viro }
37802afc626SAl Viro rcu_assign_pointer(*new_fds++, f);
37902afc626SAl Viro }
38002afc626SAl Viro spin_unlock(&oldf->file_lock);
38102afc626SAl Viro
382ea5c58e7SEric Biggers /* clear the remainder */
383ea5c58e7SEric Biggers memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));
38402afc626SAl Viro
385afbec7ffSAl Viro rcu_assign_pointer(newf->fdt, new_fdt);
386afbec7ffSAl Viro
38702afc626SAl Viro return newf;
38802afc626SAl Viro
38902afc626SAl Viro out_release:
39002afc626SAl Viro kmem_cache_free(files_cachep, newf);
391a8023f8bSAl Viro return ERR_PTR(error);
39202afc626SAl Viro }
39302afc626SAl Viro
close_files(struct files_struct * files)394ce08b62dSOleg Nesterov static struct fdtable *close_files(struct files_struct * files)
3957cf4dc3cSAl Viro {
3967cf4dc3cSAl Viro /*
3977cf4dc3cSAl Viro * It is safe to dereference the fd table without RCU or
3987cf4dc3cSAl Viro * ->file_lock because this is the last reference to the
399ce08b62dSOleg Nesterov * files structure.
4007cf4dc3cSAl Viro */
401ce08b62dSOleg Nesterov struct fdtable *fdt = rcu_dereference_raw(files->fdt);
4029b80a184SAlexey Dobriyan unsigned int i, j = 0;
403ce08b62dSOleg Nesterov
4047cf4dc3cSAl Viro for (;;) {
4057cf4dc3cSAl Viro unsigned long set;
4067cf4dc3cSAl Viro i = j * BITS_PER_LONG;
4077cf4dc3cSAl Viro if (i >= fdt->max_fds)
4087cf4dc3cSAl Viro break;
4097cf4dc3cSAl Viro set = fdt->open_fds[j++];
4107cf4dc3cSAl Viro while (set) {
4117cf4dc3cSAl Viro if (set & 1) {
4127cf4dc3cSAl Viro struct file * file = xchg(&fdt->fd[i], NULL);
4137cf4dc3cSAl Viro if (file) {
4147cf4dc3cSAl Viro filp_close(file, files);
415388a4c88SPaul E. McKenney cond_resched();
4167cf4dc3cSAl Viro }
4177cf4dc3cSAl Viro }
4187cf4dc3cSAl Viro i++;
4197cf4dc3cSAl Viro set >>= 1;
4207cf4dc3cSAl Viro }
4217cf4dc3cSAl Viro }
422ce08b62dSOleg Nesterov
423ce08b62dSOleg Nesterov return fdt;
4247cf4dc3cSAl Viro }
4257cf4dc3cSAl Viro
put_files_struct(struct files_struct * files)4267cf4dc3cSAl Viro void put_files_struct(struct files_struct *files)
4277cf4dc3cSAl Viro {
4287cf4dc3cSAl Viro if (atomic_dec_and_test(&files->count)) {
429ce08b62dSOleg Nesterov struct fdtable *fdt = close_files(files);
430ce08b62dSOleg Nesterov
431b9e02af0SAl Viro /* free the arrays if they are not embedded */
432b9e02af0SAl Viro if (fdt != &files->fdtab)
433b9e02af0SAl Viro __free_fdtable(fdt);
434b9e02af0SAl Viro kmem_cache_free(files_cachep, files);
4357cf4dc3cSAl Viro }
4367cf4dc3cSAl Viro }
4377cf4dc3cSAl Viro
exit_files(struct task_struct * tsk)4387cf4dc3cSAl Viro void exit_files(struct task_struct *tsk)
4397cf4dc3cSAl Viro {
4407cf4dc3cSAl Viro struct files_struct * files = tsk->files;
4417cf4dc3cSAl Viro
4427cf4dc3cSAl Viro if (files) {
4437cf4dc3cSAl Viro task_lock(tsk);
4447cf4dc3cSAl Viro tsk->files = NULL;
4457cf4dc3cSAl Viro task_unlock(tsk);
4467cf4dc3cSAl Viro put_files_struct(files);
4477cf4dc3cSAl Viro }
4487cf4dc3cSAl Viro }
4497cf4dc3cSAl Viro
450f52111b1SAl Viro struct files_struct init_files = {
451f52111b1SAl Viro .count = ATOMIC_INIT(1),
452f52111b1SAl Viro .fdt = &init_files.fdtab,
453f52111b1SAl Viro .fdtab = {
454f52111b1SAl Viro .max_fds = NR_OPEN_DEFAULT,
455f52111b1SAl Viro .fd = &init_files.fd_array[0],
4561fd36adcSDavid Howells .close_on_exec = init_files.close_on_exec_init,
4571fd36adcSDavid Howells .open_fds = init_files.open_fds_init,
458f3f86e33SLinus Torvalds .full_fds_bits = init_files.full_fds_bits_init,
459f52111b1SAl Viro },
460eece09ecSThomas Gleixner .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
4615704a068SShuriyc Chu .resize_wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_files.resize_wait),
462f52111b1SAl Viro };
4631027abe8SAl Viro
find_next_fd(struct fdtable * fdt,unsigned int start)4649b80a184SAlexey Dobriyan static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
465f3f86e33SLinus Torvalds {
466bd56b910SYuntao Wang unsigned int maxfd = fdt->max_fds; /* always multiple of BITS_PER_LONG */
4679b80a184SAlexey Dobriyan unsigned int maxbit = maxfd / BITS_PER_LONG;
4689b80a184SAlexey Dobriyan unsigned int bitbit = start / BITS_PER_LONG;
469f3f86e33SLinus Torvalds
470f3f86e33SLinus Torvalds bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
471bd56b910SYuntao Wang if (bitbit >= maxfd)
472f3f86e33SLinus Torvalds return maxfd;
473f3f86e33SLinus Torvalds if (bitbit > start)
474f3f86e33SLinus Torvalds start = bitbit;
475f3f86e33SLinus Torvalds return find_next_zero_bit(fdt->open_fds, maxfd, start);
476f3f86e33SLinus Torvalds }
477f3f86e33SLinus Torvalds
4781027abe8SAl Viro /*
4791027abe8SAl Viro * allocate a file descriptor, mark it busy.
4801027abe8SAl Viro */
alloc_fd(unsigned start,unsigned end,unsigned flags)481aa384d10SEric W. Biederman static int alloc_fd(unsigned start, unsigned end, unsigned flags)
4821027abe8SAl Viro {
483aa384d10SEric W. Biederman struct files_struct *files = current->files;
4841027abe8SAl Viro unsigned int fd;
4851027abe8SAl Viro int error;
4861027abe8SAl Viro struct fdtable *fdt;
4871027abe8SAl Viro
4881027abe8SAl Viro spin_lock(&files->file_lock);
4891027abe8SAl Viro repeat:
4901027abe8SAl Viro fdt = files_fdtable(files);
4911027abe8SAl Viro fd = start;
4921027abe8SAl Viro if (fd < files->next_fd)
4931027abe8SAl Viro fd = files->next_fd;
4941027abe8SAl Viro
4951027abe8SAl Viro if (fd < fdt->max_fds)
496f3f86e33SLinus Torvalds fd = find_next_fd(fdt, fd);
4971027abe8SAl Viro
498f33ff992SAl Viro /*
499f33ff992SAl Viro * N.B. For clone tasks sharing a files structure, this test
500f33ff992SAl Viro * will limit the total number of files that can be opened.
501f33ff992SAl Viro */
502f33ff992SAl Viro error = -EMFILE;
503f33ff992SAl Viro if (fd >= end)
504f33ff992SAl Viro goto out;
505f33ff992SAl Viro
5061027abe8SAl Viro error = expand_files(files, fd);
5071027abe8SAl Viro if (error < 0)
5081027abe8SAl Viro goto out;
5091027abe8SAl Viro
5101027abe8SAl Viro /*
5111027abe8SAl Viro * If we needed to expand the fs array we
5121027abe8SAl Viro * might have blocked - try again.
5131027abe8SAl Viro */
5141027abe8SAl Viro if (error)
5151027abe8SAl Viro goto repeat;
5161027abe8SAl Viro
5171027abe8SAl Viro if (start <= files->next_fd)
5181027abe8SAl Viro files->next_fd = fd + 1;
5191027abe8SAl Viro
5201dce27c5SDavid Howells __set_open_fd(fd, fdt);
5211027abe8SAl Viro if (flags & O_CLOEXEC)
5221dce27c5SDavid Howells __set_close_on_exec(fd, fdt);
5231027abe8SAl Viro else
5241dce27c5SDavid Howells __clear_close_on_exec(fd, fdt);
5251027abe8SAl Viro error = fd;
5261027abe8SAl Viro #if 1
5271027abe8SAl Viro /* Sanity check */
528add1f099SPaul E. McKenney if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
5291027abe8SAl Viro printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
5301027abe8SAl Viro rcu_assign_pointer(fdt->fd[fd], NULL);
5311027abe8SAl Viro }
5321027abe8SAl Viro #endif
5331027abe8SAl Viro
5341027abe8SAl Viro out:
5351027abe8SAl Viro spin_unlock(&files->file_lock);
5361027abe8SAl Viro return error;
5371027abe8SAl Viro }
5381027abe8SAl Viro
__get_unused_fd_flags(unsigned flags,unsigned long nofile)5394022e7afSJens Axboe int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
5404022e7afSJens Axboe {
541aa384d10SEric W. Biederman return alloc_fd(0, nofile, flags);
5424022e7afSJens Axboe }
5434022e7afSJens Axboe
get_unused_fd_flags(unsigned flags)5441a7bd226SAl Viro int get_unused_fd_flags(unsigned flags)
5451027abe8SAl Viro {
5464022e7afSJens Axboe return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
5471027abe8SAl Viro }
5481a7bd226SAl Viro EXPORT_SYMBOL(get_unused_fd_flags);
54956007caeSAl Viro
__put_unused_fd(struct files_struct * files,unsigned int fd)55056007caeSAl Viro static void __put_unused_fd(struct files_struct *files, unsigned int fd)
55156007caeSAl Viro {
55256007caeSAl Viro struct fdtable *fdt = files_fdtable(files);
55356007caeSAl Viro __clear_open_fd(fd, fdt);
55456007caeSAl Viro if (fd < files->next_fd)
55556007caeSAl Viro files->next_fd = fd;
55656007caeSAl Viro }
55756007caeSAl Viro
put_unused_fd(unsigned int fd)55856007caeSAl Viro void put_unused_fd(unsigned int fd)
55956007caeSAl Viro {
56056007caeSAl Viro struct files_struct *files = current->files;
56156007caeSAl Viro spin_lock(&files->file_lock);
56256007caeSAl Viro __put_unused_fd(files, fd);
56356007caeSAl Viro spin_unlock(&files->file_lock);
56456007caeSAl Viro }
56556007caeSAl Viro
56656007caeSAl Viro EXPORT_SYMBOL(put_unused_fd);
56756007caeSAl Viro
56856007caeSAl Viro /*
56956007caeSAl Viro * Install a file pointer in the fd array.
57056007caeSAl Viro *
57156007caeSAl Viro * The VFS is full of places where we drop the files lock between
57256007caeSAl Viro * setting the open_fds bitmap and installing the file in the file
57356007caeSAl Viro * array. At any such point, we are vulnerable to a dup2() race
57456007caeSAl Viro * installing a file in the array before us. We need to detect this and
57556007caeSAl Viro * fput() the struct file we are about to overwrite in this case.
57656007caeSAl Viro *
57756007caeSAl Viro * It should never happen - if we allow dup2() do it, _really_ bad things
57856007caeSAl Viro * will follow.
579f869e8a7SAl Viro *
580d74ba04dSEric W. Biederman * This consumes the "file" refcount, so callers should treat it
581d74ba04dSEric W. Biederman * as if they had called fput(file).
58256007caeSAl Viro */
58356007caeSAl Viro
fd_install(unsigned int fd,struct file * file)584d74ba04dSEric W. Biederman void fd_install(unsigned int fd, struct file *file)
58556007caeSAl Viro {
586d74ba04dSEric W. Biederman struct files_struct *files = current->files;
58756007caeSAl Viro struct fdtable *fdt;
5888a81252bSEric Dumazet
5898a81252bSEric Dumazet rcu_read_lock_sched();
5908a81252bSEric Dumazet
591c02b1a9bSMateusz Guzik if (unlikely(files->resize_in_progress)) {
5928a81252bSEric Dumazet rcu_read_unlock_sched();
593c02b1a9bSMateusz Guzik spin_lock(&files->file_lock);
594c02b1a9bSMateusz Guzik fdt = files_fdtable(files);
595c02b1a9bSMateusz Guzik BUG_ON(fdt->fd[fd] != NULL);
596c02b1a9bSMateusz Guzik rcu_assign_pointer(fdt->fd[fd], file);
597c02b1a9bSMateusz Guzik spin_unlock(&files->file_lock);
598c02b1a9bSMateusz Guzik return;
5998a81252bSEric Dumazet }
6008a81252bSEric Dumazet /* coupled with smp_wmb() in expand_fdtable() */
6018a81252bSEric Dumazet smp_rmb();
6028a81252bSEric Dumazet fdt = rcu_dereference_sched(files->fdt);
60356007caeSAl Viro BUG_ON(fdt->fd[fd] != NULL);
60456007caeSAl Viro rcu_assign_pointer(fdt->fd[fd], file);
6058a81252bSEric Dumazet rcu_read_unlock_sched();
60656007caeSAl Viro }
60756007caeSAl Viro
60856007caeSAl Viro EXPORT_SYMBOL(fd_install);
6090ee8cdfeSAl Viro
610f49fd6d3SChristian Brauner /**
611f49fd6d3SChristian Brauner * pick_file - return file associatd with fd
612f49fd6d3SChristian Brauner * @files: file struct to retrieve file from
613f49fd6d3SChristian Brauner * @fd: file descriptor to retrieve file for
614f49fd6d3SChristian Brauner *
6156319194eSAl Viro * Context: files_lock must be held.
616f49fd6d3SChristian Brauner *
6176319194eSAl Viro * Returns: The file associated with @fd (NULL if @fd is not open)
618f49fd6d3SChristian Brauner */
pick_file(struct files_struct * files,unsigned fd)619278a5fbaSChristian Brauner static struct file *pick_file(struct files_struct *files, unsigned fd)
620483ce1d4SAl Viro {
6216319194eSAl Viro struct fdtable *fdt = files_fdtable(files);
622f49fd6d3SChristian Brauner struct file *file;
623483ce1d4SAl Viro
6246319194eSAl Viro if (fd >= fdt->max_fds)
6256319194eSAl Viro return NULL;
6266319194eSAl Viro
627609d5444STheodore Ts'o fd = array_index_nospec(fd, fdt->max_fds);
628483ce1d4SAl Viro file = fdt->fd[fd];
6296319194eSAl Viro if (file) {
630483ce1d4SAl Viro rcu_assign_pointer(fdt->fd[fd], NULL);
631483ce1d4SAl Viro __put_unused_fd(files, fd);
6326319194eSAl Viro }
633278a5fbaSChristian Brauner return file;
634278a5fbaSChristian Brauner }
635278a5fbaSChristian Brauner
close_fd(unsigned fd)6368760c909SEric W. Biederman int close_fd(unsigned fd)
637278a5fbaSChristian Brauner {
6388760c909SEric W. Biederman struct files_struct *files = current->files;
639278a5fbaSChristian Brauner struct file *file;
640278a5fbaSChristian Brauner
6416319194eSAl Viro spin_lock(&files->file_lock);
642278a5fbaSChristian Brauner file = pick_file(files, fd);
6436319194eSAl Viro spin_unlock(&files->file_lock);
6446319194eSAl Viro if (!file)
645483ce1d4SAl Viro return -EBADF;
646278a5fbaSChristian Brauner
647278a5fbaSChristian Brauner return filp_close(file, files);
648483ce1d4SAl Viro }
6498760c909SEric W. Biederman EXPORT_SYMBOL(close_fd); /* for ksys_close() */
650483ce1d4SAl Viro
6519b5b8722SChristian Brauner /**
6529b5b8722SChristian Brauner * last_fd - return last valid index into fd table
65335931eb3SMatthew Wilcox (Oracle) * @fdt: File descriptor table.
6549b5b8722SChristian Brauner *
6559b5b8722SChristian Brauner * Context: Either rcu read lock or files_lock must be held.
6569b5b8722SChristian Brauner *
6579b5b8722SChristian Brauner * Returns: Last valid index into fdtable.
6589b5b8722SChristian Brauner */
last_fd(struct fdtable * fdt)6599b5b8722SChristian Brauner static inline unsigned last_fd(struct fdtable *fdt)
6609b5b8722SChristian Brauner {
6619b5b8722SChristian Brauner return fdt->max_fds - 1;
6629b5b8722SChristian Brauner }
6639b5b8722SChristian Brauner
__range_cloexec(struct files_struct * cur_fds,unsigned int fd,unsigned int max_fd)664582f1fb6SGiuseppe Scrivano static inline void __range_cloexec(struct files_struct *cur_fds,
665582f1fb6SGiuseppe Scrivano unsigned int fd, unsigned int max_fd)
666582f1fb6SGiuseppe Scrivano {
667582f1fb6SGiuseppe Scrivano struct fdtable *fdt;
668582f1fb6SGiuseppe Scrivano
6699b5b8722SChristian Brauner /* make sure we're using the correct maximum value */
670582f1fb6SGiuseppe Scrivano spin_lock(&cur_fds->file_lock);
671582f1fb6SGiuseppe Scrivano fdt = files_fdtable(cur_fds);
6729b5b8722SChristian Brauner max_fd = min(last_fd(fdt), max_fd);
6739b5b8722SChristian Brauner if (fd <= max_fd)
674582f1fb6SGiuseppe Scrivano bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
675582f1fb6SGiuseppe Scrivano spin_unlock(&cur_fds->file_lock);
676582f1fb6SGiuseppe Scrivano }
677582f1fb6SGiuseppe Scrivano
__range_close(struct files_struct * files,unsigned int fd,unsigned int max_fd)678ed192c59SMateusz Guzik static inline void __range_close(struct files_struct *files, unsigned int fd,
679582f1fb6SGiuseppe Scrivano unsigned int max_fd)
680582f1fb6SGiuseppe Scrivano {
681ed192c59SMateusz Guzik struct file *file;
6826319194eSAl Viro unsigned n;
6836319194eSAl Viro
684ed192c59SMateusz Guzik spin_lock(&files->file_lock);
685ed192c59SMateusz Guzik n = last_fd(files_fdtable(files));
6866319194eSAl Viro max_fd = min(max_fd, n);
6876319194eSAl Viro
688ed192c59SMateusz Guzik for (; fd <= max_fd; fd++) {
689ed192c59SMateusz Guzik file = pick_file(files, fd);
6906319194eSAl Viro if (file) {
691ed192c59SMateusz Guzik spin_unlock(&files->file_lock);
692ed192c59SMateusz Guzik filp_close(file, files);
693582f1fb6SGiuseppe Scrivano cond_resched();
694ed192c59SMateusz Guzik spin_lock(&files->file_lock);
695ed192c59SMateusz Guzik } else if (need_resched()) {
696ed192c59SMateusz Guzik spin_unlock(&files->file_lock);
697ed192c59SMateusz Guzik cond_resched();
698ed192c59SMateusz Guzik spin_lock(&files->file_lock);
699f49fd6d3SChristian Brauner }
700582f1fb6SGiuseppe Scrivano }
701ed192c59SMateusz Guzik spin_unlock(&files->file_lock);
702582f1fb6SGiuseppe Scrivano }
703582f1fb6SGiuseppe Scrivano
704278a5fbaSChristian Brauner /**
705278a5fbaSChristian Brauner * __close_range() - Close all file descriptors in a given range.
706278a5fbaSChristian Brauner *
707278a5fbaSChristian Brauner * @fd: starting file descriptor to close
708278a5fbaSChristian Brauner * @max_fd: last file descriptor to close
70935931eb3SMatthew Wilcox (Oracle) * @flags: CLOSE_RANGE flags.
710278a5fbaSChristian Brauner *
711278a5fbaSChristian Brauner * This closes a range of file descriptors. All file descriptors
712278a5fbaSChristian Brauner * from @fd up to and including @max_fd are closed.
713278a5fbaSChristian Brauner */
__close_range(unsigned fd,unsigned max_fd,unsigned int flags)71460997c3dSChristian Brauner int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
715278a5fbaSChristian Brauner {
71660997c3dSChristian Brauner struct task_struct *me = current;
71760997c3dSChristian Brauner struct files_struct *cur_fds = me->files, *fds = NULL;
71860997c3dSChristian Brauner
719582f1fb6SGiuseppe Scrivano if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC))
72060997c3dSChristian Brauner return -EINVAL;
721278a5fbaSChristian Brauner
722278a5fbaSChristian Brauner if (fd > max_fd)
723278a5fbaSChristian Brauner return -EINVAL;
724278a5fbaSChristian Brauner
725a8023f8bSAl Viro if ((flags & CLOSE_RANGE_UNSHARE) && atomic_read(&cur_fds->count) > 1) {
726a8023f8bSAl Viro struct fd_range range = {fd, max_fd}, *punch_hole = ⦥
72760997c3dSChristian Brauner
72860997c3dSChristian Brauner /*
72903ba0fe4SChristian Brauner * If the caller requested all fds to be made cloexec we always
73003ba0fe4SChristian Brauner * copy all of the file descriptors since they still want to
73103ba0fe4SChristian Brauner * use them.
73260997c3dSChristian Brauner */
733a8023f8bSAl Viro if (flags & CLOSE_RANGE_CLOEXEC)
734a8023f8bSAl Viro punch_hole = NULL;
73560997c3dSChristian Brauner
736a8023f8bSAl Viro fds = dup_fd(cur_fds, punch_hole);
737a8023f8bSAl Viro if (IS_ERR(fds))
738a8023f8bSAl Viro return PTR_ERR(fds);
73960997c3dSChristian Brauner /*
74060997c3dSChristian Brauner * We used to share our file descriptor table, and have now
74160997c3dSChristian Brauner * created a private one, make sure we're using it below.
74260997c3dSChristian Brauner */
74360997c3dSChristian Brauner swap(cur_fds, fds);
74460997c3dSChristian Brauner }
74560997c3dSChristian Brauner
746582f1fb6SGiuseppe Scrivano if (flags & CLOSE_RANGE_CLOEXEC)
747582f1fb6SGiuseppe Scrivano __range_cloexec(cur_fds, fd, max_fd);
748582f1fb6SGiuseppe Scrivano else
749582f1fb6SGiuseppe Scrivano __range_close(cur_fds, fd, max_fd);
750278a5fbaSChristian Brauner
75160997c3dSChristian Brauner if (fds) {
75260997c3dSChristian Brauner /*
75360997c3dSChristian Brauner * We're done closing the files we were supposed to. Time to install
75460997c3dSChristian Brauner * the new file descriptor table and drop the old one.
75560997c3dSChristian Brauner */
75660997c3dSChristian Brauner task_lock(me);
75760997c3dSChristian Brauner me->files = cur_fds;
75860997c3dSChristian Brauner task_unlock(me);
75960997c3dSChristian Brauner put_files_struct(fds);
76060997c3dSChristian Brauner }
76160997c3dSChristian Brauner
762278a5fbaSChristian Brauner return 0;
763278a5fbaSChristian Brauner }
764278a5fbaSChristian Brauner
76580cd7956STodd Kjos /*
76653dec2eaSJens Axboe * See close_fd_get_file() below, this variant assumes current->files->file_lock
76753dec2eaSJens Axboe * is held.
76853dec2eaSJens Axboe */
__close_fd_get_file(unsigned int fd)7696319194eSAl Viro struct file *__close_fd_get_file(unsigned int fd)
77053dec2eaSJens Axboe {
7716319194eSAl Viro return pick_file(current->files, fd);
77253dec2eaSJens Axboe }
77353dec2eaSJens Axboe
77453dec2eaSJens Axboe /*
7759fe83c43SEric W. Biederman * variant of close_fd that gets a ref on the file for later fput.
77640a19260SAl Viro * The caller must ensure that filp_close() called on the file.
77780cd7956STodd Kjos */
close_fd_get_file(unsigned int fd)7786319194eSAl Viro struct file *close_fd_get_file(unsigned int fd)
77980cd7956STodd Kjos {
78080cd7956STodd Kjos struct files_struct *files = current->files;
7816319194eSAl Viro struct file *file;
78280cd7956STodd Kjos
78380cd7956STodd Kjos spin_lock(&files->file_lock);
7846319194eSAl Viro file = pick_file(files, fd);
78580cd7956STodd Kjos spin_unlock(&files->file_lock);
78680cd7956STodd Kjos
7876319194eSAl Viro return file;
78880cd7956STodd Kjos }
78980cd7956STodd Kjos
do_close_on_exec(struct files_struct * files)7906a6d27deSAl Viro void do_close_on_exec(struct files_struct *files)
7916a6d27deSAl Viro {
7926a6d27deSAl Viro unsigned i;
7936a6d27deSAl Viro struct fdtable *fdt;
7946a6d27deSAl Viro
7956a6d27deSAl Viro /* exec unshares first */
7966a6d27deSAl Viro spin_lock(&files->file_lock);
7976a6d27deSAl Viro for (i = 0; ; i++) {
7986a6d27deSAl Viro unsigned long set;
7996a6d27deSAl Viro unsigned fd = i * BITS_PER_LONG;
8006a6d27deSAl Viro fdt = files_fdtable(files);
8016a6d27deSAl Viro if (fd >= fdt->max_fds)
8026a6d27deSAl Viro break;
8036a6d27deSAl Viro set = fdt->close_on_exec[i];
8046a6d27deSAl Viro if (!set)
8056a6d27deSAl Viro continue;
8066a6d27deSAl Viro fdt->close_on_exec[i] = 0;
8076a6d27deSAl Viro for ( ; set ; fd++, set >>= 1) {
8086a6d27deSAl Viro struct file *file;
8096a6d27deSAl Viro if (!(set & 1))
8106a6d27deSAl Viro continue;
8116a6d27deSAl Viro file = fdt->fd[fd];
8126a6d27deSAl Viro if (!file)
8136a6d27deSAl Viro continue;
8146a6d27deSAl Viro rcu_assign_pointer(fdt->fd[fd], NULL);
8156a6d27deSAl Viro __put_unused_fd(files, fd);
8166a6d27deSAl Viro spin_unlock(&files->file_lock);
8176a6d27deSAl Viro filp_close(file, files);
8186a6d27deSAl Viro cond_resched();
8196a6d27deSAl Viro spin_lock(&files->file_lock);
8206a6d27deSAl Viro }
8216a6d27deSAl Viro
8226a6d27deSAl Viro }
8236a6d27deSAl Viro spin_unlock(&files->file_lock);
8246a6d27deSAl Viro }
8256a6d27deSAl Viro
__fget_files_rcu(struct files_struct * files,unsigned int fd,fmode_t mask)826e386dfc5SLinus Torvalds static inline struct file *__fget_files_rcu(struct files_struct *files,
82781132a39SGou Hao unsigned int fd, fmode_t mask)
828e386dfc5SLinus Torvalds {
829e386dfc5SLinus Torvalds for (;;) {
830e386dfc5SLinus Torvalds struct file *file;
831e386dfc5SLinus Torvalds struct fdtable *fdt = rcu_dereference_raw(files->fdt);
832e386dfc5SLinus Torvalds struct file __rcu **fdentry;
833e386dfc5SLinus Torvalds
834e386dfc5SLinus Torvalds if (unlikely(fd >= fdt->max_fds))
835e386dfc5SLinus Torvalds return NULL;
836e386dfc5SLinus Torvalds
837e386dfc5SLinus Torvalds fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
838e386dfc5SLinus Torvalds file = rcu_dereference_raw(*fdentry);
839e386dfc5SLinus Torvalds if (unlikely(!file))
840e386dfc5SLinus Torvalds return NULL;
841e386dfc5SLinus Torvalds
842e386dfc5SLinus Torvalds if (unlikely(file->f_mode & mask))
843e386dfc5SLinus Torvalds return NULL;
844e386dfc5SLinus Torvalds
845e386dfc5SLinus Torvalds /*
846e386dfc5SLinus Torvalds * Ok, we have a file pointer. However, because we do
847e386dfc5SLinus Torvalds * this all locklessly under RCU, we may be racing with
848e386dfc5SLinus Torvalds * that file being closed.
849e386dfc5SLinus Torvalds *
850e386dfc5SLinus Torvalds * Such a race can take two forms:
851e386dfc5SLinus Torvalds *
852e386dfc5SLinus Torvalds * (a) the file ref already went down to zero,
85381132a39SGou Hao * and get_file_rcu() fails. Just try again:
854e386dfc5SLinus Torvalds */
85581132a39SGou Hao if (unlikely(!get_file_rcu(file)))
856e386dfc5SLinus Torvalds continue;
857e386dfc5SLinus Torvalds
858e386dfc5SLinus Torvalds /*
859e386dfc5SLinus Torvalds * (b) the file table entry has changed under us.
860e386dfc5SLinus Torvalds * Note that we don't need to re-check the 'fdt->fd'
861e386dfc5SLinus Torvalds * pointer having changed, because it always goes
862e386dfc5SLinus Torvalds * hand-in-hand with 'fdt'.
863e386dfc5SLinus Torvalds *
86481132a39SGou Hao * If so, we need to put our ref and try again.
865e386dfc5SLinus Torvalds */
866e386dfc5SLinus Torvalds if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
867e386dfc5SLinus Torvalds unlikely(rcu_dereference_raw(*fdentry) != file)) {
86881132a39SGou Hao fput(file);
869e386dfc5SLinus Torvalds continue;
870e386dfc5SLinus Torvalds }
871e386dfc5SLinus Torvalds
872e386dfc5SLinus Torvalds /*
873e386dfc5SLinus Torvalds * Ok, we have a ref to the file, and checked that it
874e386dfc5SLinus Torvalds * still exists.
875e386dfc5SLinus Torvalds */
876e386dfc5SLinus Torvalds return file;
877e386dfc5SLinus Torvalds }
878e386dfc5SLinus Torvalds }
879e386dfc5SLinus Torvalds
__fget_files(struct files_struct * files,unsigned int fd,fmode_t mask)8805e876fb4SSargun Dhillon static struct file *__fget_files(struct files_struct *files, unsigned int fd,
88181132a39SGou Hao fmode_t mask)
8820ee8cdfeSAl Viro {
8831deb46e2SOleg Nesterov struct file *file;
8840ee8cdfeSAl Viro
8850ee8cdfeSAl Viro rcu_read_lock();
88681132a39SGou Hao file = __fget_files_rcu(files, fd, mask);
8870ee8cdfeSAl Viro rcu_read_unlock();
8880ee8cdfeSAl Viro
8890ee8cdfeSAl Viro return file;
8900ee8cdfeSAl Viro }
8910ee8cdfeSAl Viro
__fget(unsigned int fd,fmode_t mask)89281132a39SGou Hao static inline struct file *__fget(unsigned int fd, fmode_t mask)
8935e876fb4SSargun Dhillon {
89481132a39SGou Hao return __fget_files(current->files, fd, mask);
895091141a4SJens Axboe }
896091141a4SJens Axboe
fget(unsigned int fd)8971deb46e2SOleg Nesterov struct file *fget(unsigned int fd)
8981deb46e2SOleg Nesterov {
89981132a39SGou Hao return __fget(fd, FMODE_PATH);
9001deb46e2SOleg Nesterov }
9010ee8cdfeSAl Viro EXPORT_SYMBOL(fget);
9020ee8cdfeSAl Viro
fget_raw(unsigned int fd)9030ee8cdfeSAl Viro struct file *fget_raw(unsigned int fd)
9040ee8cdfeSAl Viro {
90581132a39SGou Hao return __fget(fd, 0);
9060ee8cdfeSAl Viro }
9070ee8cdfeSAl Viro EXPORT_SYMBOL(fget_raw);
9080ee8cdfeSAl Viro
fget_task(struct task_struct * task,unsigned int fd)9095e876fb4SSargun Dhillon struct file *fget_task(struct task_struct *task, unsigned int fd)
9105e876fb4SSargun Dhillon {
9115e876fb4SSargun Dhillon struct file *file = NULL;
9125e876fb4SSargun Dhillon
9135e876fb4SSargun Dhillon task_lock(task);
9145e876fb4SSargun Dhillon if (task->files)
91581132a39SGou Hao file = __fget_files(task->files, fd, 0);
9165e876fb4SSargun Dhillon task_unlock(task);
9175e876fb4SSargun Dhillon
9185e876fb4SSargun Dhillon return file;
9195e876fb4SSargun Dhillon }
9205e876fb4SSargun Dhillon
task_lookup_fd_rcu(struct task_struct * task,unsigned int fd)9213a879fb3SEric W. Biederman struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
9223a879fb3SEric W. Biederman {
9233a879fb3SEric W. Biederman /* Must be called with rcu_read_lock held */
9243a879fb3SEric W. Biederman struct files_struct *files;
9253a879fb3SEric W. Biederman struct file *file = NULL;
9263a879fb3SEric W. Biederman
9273a879fb3SEric W. Biederman task_lock(task);
9283a879fb3SEric W. Biederman files = task->files;
9293a879fb3SEric W. Biederman if (files)
9303a879fb3SEric W. Biederman file = files_lookup_fd_rcu(files, fd);
9313a879fb3SEric W. Biederman task_unlock(task);
9323a879fb3SEric W. Biederman
9333a879fb3SEric W. Biederman return file;
9343a879fb3SEric W. Biederman }
9353a879fb3SEric W. Biederman
task_lookup_next_fd_rcu(struct task_struct * task,unsigned int * ret_fd)936e9a53aebSEric W. Biederman struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd)
937e9a53aebSEric W. Biederman {
938e9a53aebSEric W. Biederman /* Must be called with rcu_read_lock held */
939e9a53aebSEric W. Biederman struct files_struct *files;
940e9a53aebSEric W. Biederman unsigned int fd = *ret_fd;
941e9a53aebSEric W. Biederman struct file *file = NULL;
942e9a53aebSEric W. Biederman
943e9a53aebSEric W. Biederman task_lock(task);
944e9a53aebSEric W. Biederman files = task->files;
945e9a53aebSEric W. Biederman if (files) {
946e9a53aebSEric W. Biederman for (; fd < files_fdtable(files)->max_fds; fd++) {
947e9a53aebSEric W. Biederman file = files_lookup_fd_rcu(files, fd);
948e9a53aebSEric W. Biederman if (file)
949e9a53aebSEric W. Biederman break;
950e9a53aebSEric W. Biederman }
951e9a53aebSEric W. Biederman }
952e9a53aebSEric W. Biederman task_unlock(task);
953e9a53aebSEric W. Biederman *ret_fd = fd;
954e9a53aebSEric W. Biederman return file;
955e9a53aebSEric W. Biederman }
9564480c27cSAndreas Gruenbacher EXPORT_SYMBOL(task_lookup_next_fd_rcu);
957e9a53aebSEric W. Biederman
9580ee8cdfeSAl Viro /*
9590ee8cdfeSAl Viro * Lightweight file lookup - no refcnt increment if fd table isn't shared.
9600ee8cdfeSAl Viro *
9610ee8cdfeSAl Viro * You can use this instead of fget if you satisfy all of the following
9620ee8cdfeSAl Viro * conditions:
9630ee8cdfeSAl Viro * 1) You must call fput_light before exiting the syscall and returning control
9640ee8cdfeSAl Viro * to userspace (i.e. you cannot remember the returned struct file * after
9650ee8cdfeSAl Viro * returning to userspace).
9660ee8cdfeSAl Viro * 2) You must not call filp_close on the returned struct file * in between
9670ee8cdfeSAl Viro * calls to fget_light and fput_light.
9680ee8cdfeSAl Viro * 3) You must not clone the current task in between the calls to fget_light
9690ee8cdfeSAl Viro * and fput_light.
9700ee8cdfeSAl Viro *
9710ee8cdfeSAl Viro * The fput_needed flag returned by fget_light should be passed to the
9720ee8cdfeSAl Viro * corresponding fput_light.
9730ee8cdfeSAl Viro */
__fget_light(unsigned int fd,fmode_t mask)974bd2a31d5SAl Viro static unsigned long __fget_light(unsigned int fd, fmode_t mask)
9750ee8cdfeSAl Viro {
9760ee8cdfeSAl Viro struct files_struct *files = current->files;
977ad461834SOleg Nesterov struct file *file;
9780ee8cdfeSAl Viro
9797ee47dcfSJann Horn /*
9807ee47dcfSJann Horn * If another thread is concurrently calling close_fd() followed
9817ee47dcfSJann Horn * by put_files_struct(), we must not observe the old table
9827ee47dcfSJann Horn * entry combined with the new refcount - otherwise we could
9837ee47dcfSJann Horn * return a file that is concurrently being freed.
9847ee47dcfSJann Horn *
9857ee47dcfSJann Horn * atomic_read_acquire() pairs with atomic_dec_and_test() in
9867ee47dcfSJann Horn * put_files_struct().
9877ee47dcfSJann Horn */
9887ee47dcfSJann Horn if (atomic_read_acquire(&files->count) == 1) {
989bebf684bSEric W. Biederman file = files_lookup_fd_raw(files, fd);
990bd2a31d5SAl Viro if (!file || unlikely(file->f_mode & mask))
991bd2a31d5SAl Viro return 0;
992bd2a31d5SAl Viro return (unsigned long)file;
9930ee8cdfeSAl Viro } else {
99481132a39SGou Hao file = __fget(fd, mask);
995bd2a31d5SAl Viro if (!file)
996bd2a31d5SAl Viro return 0;
997bd2a31d5SAl Viro return FDPUT_FPUT | (unsigned long)file;
998bd2a31d5SAl Viro }
999bd2a31d5SAl Viro }
__fdget(unsigned int fd)1000bd2a31d5SAl Viro unsigned long __fdget(unsigned int fd)
1001bd2a31d5SAl Viro {
1002bd2a31d5SAl Viro return __fget_light(fd, FMODE_PATH);
1003bd2a31d5SAl Viro }
1004bd2a31d5SAl Viro EXPORT_SYMBOL(__fdget);
1005bd2a31d5SAl Viro
__fdget_raw(unsigned int fd)1006bd2a31d5SAl Viro unsigned long __fdget_raw(unsigned int fd)
1007bd2a31d5SAl Viro {
1008bd2a31d5SAl Viro return __fget_light(fd, 0);
10090ee8cdfeSAl Viro }
10100ee8cdfeSAl Viro
101179796425SLinus Torvalds /*
101279796425SLinus Torvalds * Try to avoid f_pos locking. We only need it if the
101379796425SLinus Torvalds * file is marked for FMODE_ATOMIC_POS, and it can be
101479796425SLinus Torvalds * accessed multiple ways.
101579796425SLinus Torvalds *
101679796425SLinus Torvalds * Always do it for directories, because pidfd_getfd()
101779796425SLinus Torvalds * can make a file accessible even if it otherwise would
101879796425SLinus Torvalds * not be, and for directories this is a correctness
101979796425SLinus Torvalds * issue, not a "POSIX requirement".
102079796425SLinus Torvalds */
file_needs_f_pos_lock(struct file * file)102179796425SLinus Torvalds static inline bool file_needs_f_pos_lock(struct file *file)
102279796425SLinus Torvalds {
102379796425SLinus Torvalds return (file->f_mode & FMODE_ATOMIC_POS) &&
10247d84d1b9SChristian Brauner (file_count(file) > 1 || file->f_op->iterate_shared);
102579796425SLinus Torvalds }
102679796425SLinus Torvalds
__fdget_pos(unsigned int fd)1027bd2a31d5SAl Viro unsigned long __fdget_pos(unsigned int fd)
1028ad461834SOleg Nesterov {
102999aea681SEric Biggers unsigned long v = __fdget(fd);
103099aea681SEric Biggers struct file *file = (struct file *)(v & ~3);
10310ee8cdfeSAl Viro
103279796425SLinus Torvalds if (file && file_needs_f_pos_lock(file)) {
1033bd2a31d5SAl Viro v |= FDPUT_POS_UNLOCK;
1034bd2a31d5SAl Viro mutex_lock(&file->f_pos_lock);
1035bd2a31d5SAl Viro }
103699aea681SEric Biggers return v;
1037bd2a31d5SAl Viro }
1038bd2a31d5SAl Viro
__f_unlock_pos(struct file * f)103963b6df14SAl Viro void __f_unlock_pos(struct file *f)
104063b6df14SAl Viro {
104163b6df14SAl Viro mutex_unlock(&f->f_pos_lock);
104263b6df14SAl Viro }
104363b6df14SAl Viro
1044bd2a31d5SAl Viro /*
1045bd2a31d5SAl Viro * We only lock f_pos if we have threads or if the file might be
1046bd2a31d5SAl Viro * shared with another process. In both cases we'll have an elevated
1047bd2a31d5SAl Viro * file count (done either by fdget() or by fork()).
1048bd2a31d5SAl Viro */
1049fe17f22dSAl Viro
set_close_on_exec(unsigned int fd,int flag)1050fe17f22dSAl Viro void set_close_on_exec(unsigned int fd, int flag)
1051fe17f22dSAl Viro {
1052fe17f22dSAl Viro struct files_struct *files = current->files;
1053fe17f22dSAl Viro struct fdtable *fdt;
1054fe17f22dSAl Viro spin_lock(&files->file_lock);
1055fe17f22dSAl Viro fdt = files_fdtable(files);
1056fe17f22dSAl Viro if (flag)
1057fe17f22dSAl Viro __set_close_on_exec(fd, fdt);
1058fe17f22dSAl Viro else
1059fe17f22dSAl Viro __clear_close_on_exec(fd, fdt);
1060fe17f22dSAl Viro spin_unlock(&files->file_lock);
1061fe17f22dSAl Viro }
1062fe17f22dSAl Viro
get_close_on_exec(unsigned int fd)1063fe17f22dSAl Viro bool get_close_on_exec(unsigned int fd)
1064fe17f22dSAl Viro {
1065fe17f22dSAl Viro struct files_struct *files = current->files;
1066fe17f22dSAl Viro struct fdtable *fdt;
1067fe17f22dSAl Viro bool res;
1068fe17f22dSAl Viro rcu_read_lock();
1069fe17f22dSAl Viro fdt = files_fdtable(files);
1070fe17f22dSAl Viro res = close_on_exec(fd, fdt);
1071fe17f22dSAl Viro rcu_read_unlock();
1072fe17f22dSAl Viro return res;
1073fe17f22dSAl Viro }
1074fe17f22dSAl Viro
do_dup2(struct files_struct * files,struct file * file,unsigned fd,unsigned flags)10758280d161SAl Viro static int do_dup2(struct files_struct *files,
10768280d161SAl Viro struct file *file, unsigned fd, unsigned flags)
1077e983094dSAl Viro __releases(&files->file_lock)
10788280d161SAl Viro {
10798280d161SAl Viro struct file *tofree;
10808280d161SAl Viro struct fdtable *fdt;
10818280d161SAl Viro
10828280d161SAl Viro /*
10838280d161SAl Viro * We need to detect attempts to do dup2() over allocated but still
10848280d161SAl Viro * not finished descriptor. NB: OpenBSD avoids that at the price of
10858280d161SAl Viro * extra work in their equivalent of fget() - they insert struct
10868280d161SAl Viro * file immediately after grabbing descriptor, mark it larval if
10878280d161SAl Viro * more work (e.g. actual opening) is needed and make sure that
10888280d161SAl Viro * fget() treats larval files as absent. Potentially interesting,
10898280d161SAl Viro * but while extra work in fget() is trivial, locking implications
10908280d161SAl Viro * and amount of surgery on open()-related paths in VFS are not.
10918280d161SAl Viro * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
10928280d161SAl Viro * deadlocks in rather amusing ways, AFAICS. All of that is out of
10938280d161SAl Viro * scope of POSIX or SUS, since neither considers shared descriptor
10948280d161SAl Viro * tables and this condition does not arise without those.
10958280d161SAl Viro */
10968280d161SAl Viro fdt = files_fdtable(files);
1097da72e783SAl Viro fd = array_index_nospec(fd, fdt->max_fds);
10988280d161SAl Viro tofree = fdt->fd[fd];
10998280d161SAl Viro if (!tofree && fd_is_open(fd, fdt))
11008280d161SAl Viro goto Ebusy;
11018280d161SAl Viro get_file(file);
11028280d161SAl Viro rcu_assign_pointer(fdt->fd[fd], file);
11038280d161SAl Viro __set_open_fd(fd, fdt);
11048280d161SAl Viro if (flags & O_CLOEXEC)
11058280d161SAl Viro __set_close_on_exec(fd, fdt);
11068280d161SAl Viro else
11078280d161SAl Viro __clear_close_on_exec(fd, fdt);
11088280d161SAl Viro spin_unlock(&files->file_lock);
11098280d161SAl Viro
11108280d161SAl Viro if (tofree)
11118280d161SAl Viro filp_close(tofree, files);
11128280d161SAl Viro
11138280d161SAl Viro return fd;
11148280d161SAl Viro
11158280d161SAl Viro Ebusy:
11168280d161SAl Viro spin_unlock(&files->file_lock);
11178280d161SAl Viro return -EBUSY;
11188280d161SAl Viro }
11198280d161SAl Viro
replace_fd(unsigned fd,struct file * file,unsigned flags)11208280d161SAl Viro int replace_fd(unsigned fd, struct file *file, unsigned flags)
11218280d161SAl Viro {
11228280d161SAl Viro int err;
11238280d161SAl Viro struct files_struct *files = current->files;
11248280d161SAl Viro
11258280d161SAl Viro if (!file)
11268760c909SEric W. Biederman return close_fd(fd);
11278280d161SAl Viro
11288280d161SAl Viro if (fd >= rlimit(RLIMIT_NOFILE))
112908f05c49SAl Viro return -EBADF;
11308280d161SAl Viro
11318280d161SAl Viro spin_lock(&files->file_lock);
11328280d161SAl Viro err = expand_files(files, fd);
11338280d161SAl Viro if (unlikely(err < 0))
11348280d161SAl Viro goto out_unlock;
11358280d161SAl Viro return do_dup2(files, file, fd, flags);
11368280d161SAl Viro
11378280d161SAl Viro out_unlock:
11388280d161SAl Viro spin_unlock(&files->file_lock);
11398280d161SAl Viro return err;
11408280d161SAl Viro }
11418280d161SAl Viro
114266590610SKees Cook /**
114366590610SKees Cook * __receive_fd() - Install received file into file descriptor table
114466590610SKees Cook * @file: struct file that was received from another process
114566590610SKees Cook * @ufd: __user pointer to write new fd number to
114666590610SKees Cook * @o_flags: the O_* flags to apply to the new fd entry
114766590610SKees Cook *
114866590610SKees Cook * Installs a received file into the file descriptor table, with appropriate
1149deefa7f3SKees Cook * checks and count updates. Optionally writes the fd number to userspace, if
1150deefa7f3SKees Cook * @ufd is non-NULL.
115166590610SKees Cook *
115266590610SKees Cook * This helper handles its own reference counting of the incoming
115366590610SKees Cook * struct file.
115466590610SKees Cook *
1155deefa7f3SKees Cook * Returns newly install fd or -ve on error.
115666590610SKees Cook */
__receive_fd(struct file * file,int __user * ufd,unsigned int o_flags)115742eb0d54SChristoph Hellwig int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags)
115866590610SKees Cook {
115966590610SKees Cook int new_fd;
116066590610SKees Cook int error;
116166590610SKees Cook
116266590610SKees Cook error = security_file_receive(file);
116366590610SKees Cook if (error)
116466590610SKees Cook return error;
116566590610SKees Cook
116666590610SKees Cook new_fd = get_unused_fd_flags(o_flags);
116766590610SKees Cook if (new_fd < 0)
116866590610SKees Cook return new_fd;
116966590610SKees Cook
1170deefa7f3SKees Cook if (ufd) {
117166590610SKees Cook error = put_user(new_fd, ufd);
117266590610SKees Cook if (error) {
117366590610SKees Cook put_unused_fd(new_fd);
117466590610SKees Cook return error;
117566590610SKees Cook }
1176deefa7f3SKees Cook }
117766590610SKees Cook
117817381715SKees Cook fd_install(new_fd, get_file(file));
117942eb0d54SChristoph Hellwig __receive_sock(file);
118042eb0d54SChristoph Hellwig return new_fd;
118142eb0d54SChristoph Hellwig }
118242eb0d54SChristoph Hellwig
receive_fd_replace(int new_fd,struct file * file,unsigned int o_flags)118342eb0d54SChristoph Hellwig int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags)
118442eb0d54SChristoph Hellwig {
118542eb0d54SChristoph Hellwig int error;
118642eb0d54SChristoph Hellwig
118742eb0d54SChristoph Hellwig error = security_file_receive(file);
118842eb0d54SChristoph Hellwig if (error)
118942eb0d54SChristoph Hellwig return error;
119017381715SKees Cook error = replace_fd(new_fd, file, o_flags);
119117381715SKees Cook if (error)
119217381715SKees Cook return error;
119366590610SKees Cook __receive_sock(file);
1194deefa7f3SKees Cook return new_fd;
119566590610SKees Cook }
119666590610SKees Cook
receive_fd(struct file * file,unsigned int o_flags)11979c930054SXie Yongji int receive_fd(struct file *file, unsigned int o_flags)
11989c930054SXie Yongji {
11999c930054SXie Yongji return __receive_fd(file, NULL, o_flags);
12009c930054SXie Yongji }
12019c930054SXie Yongji EXPORT_SYMBOL_GPL(receive_fd);
12029c930054SXie Yongji
ksys_dup3(unsigned int oldfd,unsigned int newfd,int flags)1203c7248321SDominik Brodowski static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
1204fe17f22dSAl Viro {
1205fe17f22dSAl Viro int err = -EBADF;
12068280d161SAl Viro struct file *file;
1207fe17f22dSAl Viro struct files_struct *files = current->files;
1208fe17f22dSAl Viro
1209fe17f22dSAl Viro if ((flags & ~O_CLOEXEC) != 0)
1210fe17f22dSAl Viro return -EINVAL;
1211fe17f22dSAl Viro
1212aed97647SRichard W.M. Jones if (unlikely(oldfd == newfd))
1213aed97647SRichard W.M. Jones return -EINVAL;
1214aed97647SRichard W.M. Jones
1215fe17f22dSAl Viro if (newfd >= rlimit(RLIMIT_NOFILE))
121608f05c49SAl Viro return -EBADF;
1217fe17f22dSAl Viro
1218fe17f22dSAl Viro spin_lock(&files->file_lock);
1219fe17f22dSAl Viro err = expand_files(files, newfd);
1220120ce2b0SEric W. Biederman file = files_lookup_fd_locked(files, oldfd);
1221fe17f22dSAl Viro if (unlikely(!file))
1222fe17f22dSAl Viro goto Ebadf;
1223fe17f22dSAl Viro if (unlikely(err < 0)) {
1224fe17f22dSAl Viro if (err == -EMFILE)
1225fe17f22dSAl Viro goto Ebadf;
1226fe17f22dSAl Viro goto out_unlock;
1227fe17f22dSAl Viro }
12288280d161SAl Viro return do_dup2(files, file, newfd, flags);
1229fe17f22dSAl Viro
1230fe17f22dSAl Viro Ebadf:
1231fe17f22dSAl Viro err = -EBADF;
1232fe17f22dSAl Viro out_unlock:
1233fe17f22dSAl Viro spin_unlock(&files->file_lock);
1234fe17f22dSAl Viro return err;
1235fe17f22dSAl Viro }
1236fe17f22dSAl Viro
SYSCALL_DEFINE3(dup3,unsigned int,oldfd,unsigned int,newfd,int,flags)1237c7248321SDominik Brodowski SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
1238c7248321SDominik Brodowski {
1239c7248321SDominik Brodowski return ksys_dup3(oldfd, newfd, flags);
1240c7248321SDominik Brodowski }
1241c7248321SDominik Brodowski
SYSCALL_DEFINE2(dup2,unsigned int,oldfd,unsigned int,newfd)1242fe17f22dSAl Viro SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
1243fe17f22dSAl Viro {
1244fe17f22dSAl Viro if (unlikely(newfd == oldfd)) { /* corner case */
1245fe17f22dSAl Viro struct files_struct *files = current->files;
1246fe17f22dSAl Viro int retval = oldfd;
1247fe17f22dSAl Viro
1248fe17f22dSAl Viro rcu_read_lock();
1249f36c2943SEric W. Biederman if (!files_lookup_fd_rcu(files, oldfd))
1250fe17f22dSAl Viro retval = -EBADF;
1251fe17f22dSAl Viro rcu_read_unlock();
1252fe17f22dSAl Viro return retval;
1253fe17f22dSAl Viro }
1254c7248321SDominik Brodowski return ksys_dup3(oldfd, newfd, 0);
1255fe17f22dSAl Viro }
1256fe17f22dSAl Viro
SYSCALL_DEFINE1(dup,unsigned int,fildes)1257bc1cd99aSChristoph Hellwig SYSCALL_DEFINE1(dup, unsigned int, fildes)
1258fe17f22dSAl Viro {
1259fe17f22dSAl Viro int ret = -EBADF;
1260fe17f22dSAl Viro struct file *file = fget_raw(fildes);
1261fe17f22dSAl Viro
1262fe17f22dSAl Viro if (file) {
12638d10a035SYann Droneaud ret = get_unused_fd_flags(0);
1264fe17f22dSAl Viro if (ret >= 0)
1265fe17f22dSAl Viro fd_install(ret, file);
1266fe17f22dSAl Viro else
1267fe17f22dSAl Viro fput(file);
1268fe17f22dSAl Viro }
1269fe17f22dSAl Viro return ret;
1270fe17f22dSAl Viro }
1271fe17f22dSAl Viro
f_dupfd(unsigned int from,struct file * file,unsigned flags)1272fe17f22dSAl Viro int f_dupfd(unsigned int from, struct file *file, unsigned flags)
1273fe17f22dSAl Viro {
1274e06b53c2SEric W. Biederman unsigned long nofile = rlimit(RLIMIT_NOFILE);
1275fe17f22dSAl Viro int err;
1276e06b53c2SEric W. Biederman if (from >= nofile)
1277fe17f22dSAl Viro return -EINVAL;
1278e06b53c2SEric W. Biederman err = alloc_fd(from, nofile, flags);
1279fe17f22dSAl Viro if (err >= 0) {
1280fe17f22dSAl Viro get_file(file);
1281fe17f22dSAl Viro fd_install(err, file);
1282fe17f22dSAl Viro }
1283fe17f22dSAl Viro return err;
1284fe17f22dSAl Viro }
1285c3c073f8SAl Viro
iterate_fd(struct files_struct * files,unsigned n,int (* f)(const void *,struct file *,unsigned),const void * p)1286c3c073f8SAl Viro int iterate_fd(struct files_struct *files, unsigned n,
1287c3c073f8SAl Viro int (*f)(const void *, struct file *, unsigned),
1288c3c073f8SAl Viro const void *p)
1289c3c073f8SAl Viro {
1290c3c073f8SAl Viro struct fdtable *fdt;
1291c3c073f8SAl Viro int res = 0;
1292c3c073f8SAl Viro if (!files)
1293c3c073f8SAl Viro return 0;
1294c3c073f8SAl Viro spin_lock(&files->file_lock);
1295a77cfcb4SAl Viro for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
1296a77cfcb4SAl Viro struct file *file;
1297a77cfcb4SAl Viro file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
1298a77cfcb4SAl Viro if (!file)
1299a77cfcb4SAl Viro continue;
1300c3c073f8SAl Viro res = f(p, file, n);
1301a77cfcb4SAl Viro if (res)
1302a77cfcb4SAl Viro break;
1303c3c073f8SAl Viro }
1304c3c073f8SAl Viro spin_unlock(&files->file_lock);
1305c3c073f8SAl Viro return res;
1306c3c073f8SAl Viro }
1307c3c073f8SAl Viro EXPORT_SYMBOL(iterate_fd);
1308