1 /* 2 * linux/fs/file.c 3 * 4 * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 5 * 6 * Manage the dynamic fd arrays in the process files_struct. 7 */ 8 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/time.h> 12 #include <linux/slab.h> 13 #include <linux/vmalloc.h> 14 #include <linux/file.h> 15 #include <linux/bitops.h> 16 #include <linux/interrupt.h> 17 #include <linux/spinlock.h> 18 #include <linux/rcupdate.h> 19 #include <linux/workqueue.h> 20 21 struct fdtable_defer { 22 spinlock_t lock; 23 struct work_struct wq; 24 struct fdtable *next; 25 }; 26 27 /* 28 * We use this list to defer free fdtables that have vmalloced 29 * sets/arrays. By keeping a per-cpu list, we avoid having to embed 30 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in 31 * this per-task structure. 32 */ 33 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 34 35 static inline void * alloc_fdmem(unsigned int size) 36 { 37 if (size <= PAGE_SIZE) 38 return kmalloc(size, GFP_KERNEL); 39 else 40 return vmalloc(size); 41 } 42 43 static inline void free_fdarr(struct fdtable *fdt) 44 { 45 if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) 46 kfree(fdt->fd); 47 else 48 vfree(fdt->fd); 49 } 50 51 static inline void free_fdset(struct fdtable *fdt) 52 { 53 if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2)) 54 kfree(fdt->open_fds); 55 else 56 vfree(fdt->open_fds); 57 } 58 59 static void free_fdtable_work(struct work_struct *work) 60 { 61 struct fdtable_defer *f = 62 container_of(work, struct fdtable_defer, wq); 63 struct fdtable *fdt; 64 65 spin_lock_bh(&f->lock); 66 fdt = f->next; 67 f->next = NULL; 68 spin_unlock_bh(&f->lock); 69 while(fdt) { 70 struct fdtable *next = fdt->next; 71 vfree(fdt->fd); 72 free_fdset(fdt); 73 kfree(fdt); 74 fdt = next; 75 } 76 } 77 78 void free_fdtable_rcu(struct rcu_head *rcu) 79 { 80 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 81 struct fdtable_defer *fddef; 82 83 BUG_ON(!fdt); 84 85 if (fdt->max_fds <= NR_OPEN_DEFAULT) { 86 /* 87 * This fdtable is embedded in the files structure and that 88 * structure itself is getting destroyed. 89 */ 90 kmem_cache_free(files_cachep, 91 container_of(fdt, struct files_struct, fdtab)); 92 return; 93 } 94 if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) { 95 kfree(fdt->fd); 96 kfree(fdt->open_fds); 97 kfree(fdt); 98 } else { 99 fddef = &get_cpu_var(fdtable_defer_list); 100 spin_lock(&fddef->lock); 101 fdt->next = fddef->next; 102 fddef->next = fdt; 103 /* vmallocs are handled from the workqueue context */ 104 schedule_work(&fddef->wq); 105 spin_unlock(&fddef->lock); 106 put_cpu_var(fdtable_defer_list); 107 } 108 } 109 110 /* 111 * Expand the fdset in the files_struct. Called with the files spinlock 112 * held for write. 113 */ 114 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) 115 { 116 unsigned int cpy, set; 117 118 BUG_ON(nfdt->max_fds < ofdt->max_fds); 119 if (ofdt->max_fds == 0) 120 return; 121 122 cpy = ofdt->max_fds * sizeof(struct file *); 123 set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); 124 memcpy(nfdt->fd, ofdt->fd, cpy); 125 memset((char *)(nfdt->fd) + cpy, 0, set); 126 127 cpy = ofdt->max_fds / BITS_PER_BYTE; 128 set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE; 129 memcpy(nfdt->open_fds, ofdt->open_fds, cpy); 130 memset((char *)(nfdt->open_fds) + cpy, 0, set); 131 memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); 132 memset((char *)(nfdt->close_on_exec) + cpy, 0, set); 133 } 134 135 static struct fdtable * alloc_fdtable(unsigned int nr) 136 { 137 struct fdtable *fdt; 138 char *data; 139 140 /* 141 * Figure out how many fds we actually want to support in this fdtable. 142 * Allocation steps are keyed to the size of the fdarray, since it 143 * grows far faster than any of the other dynamic data. We try to fit 144 * the fdarray into comfortable page-tuned chunks: starting at 1024B 145 * and growing in powers of two from there on. 146 */ 147 nr /= (1024 / sizeof(struct file *)); 148 nr = roundup_pow_of_two(nr + 1); 149 nr *= (1024 / sizeof(struct file *)); 150 if (nr > NR_OPEN) 151 nr = NR_OPEN; 152 153 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); 154 if (!fdt) 155 goto out; 156 fdt->max_fds = nr; 157 data = alloc_fdmem(nr * sizeof(struct file *)); 158 if (!data) 159 goto out_fdt; 160 fdt->fd = (struct file **)data; 161 data = alloc_fdmem(max_t(unsigned int, 162 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); 163 if (!data) 164 goto out_arr; 165 fdt->open_fds = (fd_set *)data; 166 data += nr / BITS_PER_BYTE; 167 fdt->close_on_exec = (fd_set *)data; 168 INIT_RCU_HEAD(&fdt->rcu); 169 fdt->next = NULL; 170 171 return fdt; 172 173 out_arr: 174 free_fdarr(fdt); 175 out_fdt: 176 kfree(fdt); 177 out: 178 return NULL; 179 } 180 181 /* 182 * Expand the file descriptor table. 183 * This function will allocate a new fdtable and both fd array and fdset, of 184 * the given size. 185 * Return <0 error code on error; 1 on successful completion. 186 * The files->file_lock should be held on entry, and will be held on exit. 187 */ 188 static int expand_fdtable(struct files_struct *files, int nr) 189 __releases(files->file_lock) 190 __acquires(files->file_lock) 191 { 192 struct fdtable *new_fdt, *cur_fdt; 193 194 spin_unlock(&files->file_lock); 195 new_fdt = alloc_fdtable(nr); 196 spin_lock(&files->file_lock); 197 if (!new_fdt) 198 return -ENOMEM; 199 /* 200 * Check again since another task may have expanded the fd table while 201 * we dropped the lock 202 */ 203 cur_fdt = files_fdtable(files); 204 if (nr >= cur_fdt->max_fds) { 205 /* Continue as planned */ 206 copy_fdtable(new_fdt, cur_fdt); 207 rcu_assign_pointer(files->fdt, new_fdt); 208 if (cur_fdt->max_fds > NR_OPEN_DEFAULT) 209 free_fdtable(cur_fdt); 210 } else { 211 /* Somebody else expanded, so undo our attempt */ 212 free_fdarr(new_fdt); 213 free_fdset(new_fdt); 214 kfree(new_fdt); 215 } 216 return 1; 217 } 218 219 /* 220 * Expand files. 221 * This function will expand the file structures, if the requested size exceeds 222 * the current capacity and there is room for expansion. 223 * Return <0 error code on error; 0 when nothing done; 1 when files were 224 * expanded and execution may have blocked. 225 * The files->file_lock should be held on entry, and will be held on exit. 226 */ 227 int expand_files(struct files_struct *files, int nr) 228 { 229 struct fdtable *fdt; 230 231 fdt = files_fdtable(files); 232 /* Do we need to expand? */ 233 if (nr < fdt->max_fds) 234 return 0; 235 /* Can we expand? */ 236 if (nr >= NR_OPEN) 237 return -EMFILE; 238 239 /* All good, so we try */ 240 return expand_fdtable(files, nr); 241 } 242 243 static void __devinit fdtable_defer_list_init(int cpu) 244 { 245 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 246 spin_lock_init(&fddef->lock); 247 INIT_WORK(&fddef->wq, free_fdtable_work); 248 fddef->next = NULL; 249 } 250 251 void __init files_defer_init(void) 252 { 253 int i; 254 for_each_possible_cpu(i) 255 fdtable_defer_list_init(i); 256 } 257