1 /* 2 * linux/fs/file.c 3 * 4 * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 5 * 6 * Manage the dynamic fd arrays in the process files_struct. 7 */ 8 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/time.h> 12 #include <linux/slab.h> 13 #include <linux/vmalloc.h> 14 #include <linux/file.h> 15 #include <linux/bitops.h> 16 #include <linux/interrupt.h> 17 #include <linux/spinlock.h> 18 #include <linux/rcupdate.h> 19 #include <linux/workqueue.h> 20 21 struct fdtable_defer { 22 spinlock_t lock; 23 struct work_struct wq; 24 struct timer_list timer; 25 struct fdtable *next; 26 }; 27 28 /* 29 * We use this list to defer free fdtables that have vmalloced 30 * sets/arrays. By keeping a per-cpu list, we avoid having to embed 31 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in 32 * this per-task structure. 33 */ 34 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 35 36 37 /* 38 * Allocate an fd array, using kmalloc or vmalloc. 39 * Note: the array isn't cleared at allocation time. 40 */ 41 struct file ** alloc_fd_array(int num) 42 { 43 struct file **new_fds; 44 int size = num * sizeof(struct file *); 45 46 if (size <= PAGE_SIZE) 47 new_fds = (struct file **) kmalloc(size, GFP_KERNEL); 48 else 49 new_fds = (struct file **) vmalloc(size); 50 return new_fds; 51 } 52 53 void free_fd_array(struct file **array, int num) 54 { 55 int size = num * sizeof(struct file *); 56 57 if (!array) { 58 printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num); 59 return; 60 } 61 62 if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ 63 return; 64 else if (size <= PAGE_SIZE) 65 kfree(array); 66 else 67 vfree(array); 68 } 69 70 static void __free_fdtable(struct fdtable *fdt) 71 { 72 free_fdset(fdt->open_fds, fdt->max_fdset); 73 free_fdset(fdt->close_on_exec, fdt->max_fdset); 74 free_fd_array(fdt->fd, fdt->max_fds); 75 kfree(fdt); 76 } 77 78 static void fdtable_timer(unsigned long data) 79 { 80 struct fdtable_defer *fddef = (struct fdtable_defer *)data; 81 82 spin_lock(&fddef->lock); 83 /* 84 * If someone already emptied the queue return. 85 */ 86 if (!fddef->next) 87 goto out; 88 if (!schedule_work(&fddef->wq)) 89 mod_timer(&fddef->timer, 5); 90 out: 91 spin_unlock(&fddef->lock); 92 } 93 94 static void free_fdtable_work(struct fdtable_defer *f) 95 { 96 struct fdtable *fdt; 97 98 spin_lock_bh(&f->lock); 99 fdt = f->next; 100 f->next = NULL; 101 spin_unlock_bh(&f->lock); 102 while(fdt) { 103 struct fdtable *next = fdt->next; 104 __free_fdtable(fdt); 105 fdt = next; 106 } 107 } 108 109 static void free_fdtable_rcu(struct rcu_head *rcu) 110 { 111 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 112 int fdset_size, fdarray_size; 113 struct fdtable_defer *fddef; 114 115 BUG_ON(!fdt); 116 fdset_size = fdt->max_fdset / 8; 117 fdarray_size = fdt->max_fds * sizeof(struct file *); 118 119 if (fdt->free_files) { 120 /* 121 * The this fdtable was embedded in the files structure 122 * and the files structure itself was getting destroyed. 123 * It is now safe to free the files structure. 124 */ 125 kmem_cache_free(files_cachep, fdt->free_files); 126 return; 127 } 128 if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) { 129 /* 130 * The fdtable was embedded 131 */ 132 return; 133 } 134 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { 135 kfree(fdt->open_fds); 136 kfree(fdt->close_on_exec); 137 kfree(fdt->fd); 138 kfree(fdt); 139 } else { 140 fddef = &get_cpu_var(fdtable_defer_list); 141 spin_lock(&fddef->lock); 142 fdt->next = fddef->next; 143 fddef->next = fdt; 144 /* 145 * vmallocs are handled from the workqueue context. 146 * If the per-cpu workqueue is running, then we 147 * defer work scheduling through a timer. 148 */ 149 if (!schedule_work(&fddef->wq)) 150 mod_timer(&fddef->timer, 5); 151 spin_unlock(&fddef->lock); 152 put_cpu_var(fdtable_defer_list); 153 } 154 } 155 156 void free_fdtable(struct fdtable *fdt) 157 { 158 if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE || 159 fdt->max_fds > NR_OPEN_DEFAULT) 160 call_rcu(&fdt->rcu, free_fdtable_rcu); 161 } 162 163 /* 164 * Expand the fdset in the files_struct. Called with the files spinlock 165 * held for write. 166 */ 167 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) 168 { 169 int i; 170 int count; 171 172 BUG_ON(nfdt->max_fdset < fdt->max_fdset); 173 BUG_ON(nfdt->max_fds < fdt->max_fds); 174 /* Copy the existing tables and install the new pointers */ 175 176 i = fdt->max_fdset / (sizeof(unsigned long) * 8); 177 count = (nfdt->max_fdset - fdt->max_fdset) / 8; 178 179 /* 180 * Don't copy the entire array if the current fdset is 181 * not yet initialised. 182 */ 183 if (i) { 184 memcpy (nfdt->open_fds, fdt->open_fds, 185 fdt->max_fdset/8); 186 memcpy (nfdt->close_on_exec, fdt->close_on_exec, 187 fdt->max_fdset/8); 188 memset (&nfdt->open_fds->fds_bits[i], 0, count); 189 memset (&nfdt->close_on_exec->fds_bits[i], 0, count); 190 } 191 192 /* Don't copy/clear the array if we are creating a new 193 fd array for fork() */ 194 if (fdt->max_fds) { 195 memcpy(nfdt->fd, fdt->fd, 196 fdt->max_fds * sizeof(struct file *)); 197 /* clear the remainder of the array */ 198 memset(&nfdt->fd[fdt->max_fds], 0, 199 (nfdt->max_fds - fdt->max_fds) * 200 sizeof(struct file *)); 201 } 202 nfdt->next_fd = fdt->next_fd; 203 } 204 205 /* 206 * Allocate an fdset array, using kmalloc or vmalloc. 207 * Note: the array isn't cleared at allocation time. 208 */ 209 fd_set * alloc_fdset(int num) 210 { 211 fd_set *new_fdset; 212 int size = num / 8; 213 214 if (size <= PAGE_SIZE) 215 new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); 216 else 217 new_fdset = (fd_set *) vmalloc(size); 218 return new_fdset; 219 } 220 221 void free_fdset(fd_set *array, int num) 222 { 223 int size = num / 8; 224 225 if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */ 226 return; 227 else if (size <= PAGE_SIZE) 228 kfree(array); 229 else 230 vfree(array); 231 } 232 233 static struct fdtable *alloc_fdtable(int nr) 234 { 235 struct fdtable *fdt = NULL; 236 int nfds = 0; 237 fd_set *new_openset = NULL, *new_execset = NULL; 238 struct file **new_fds; 239 240 fdt = kmalloc(sizeof(*fdt), GFP_KERNEL); 241 if (!fdt) 242 goto out; 243 memset(fdt, 0, sizeof(*fdt)); 244 245 nfds = __FD_SETSIZE; 246 /* Expand to the max in easy steps */ 247 do { 248 if (nfds < (PAGE_SIZE * 8)) 249 nfds = PAGE_SIZE * 8; 250 else { 251 nfds = nfds * 2; 252 if (nfds > NR_OPEN) 253 nfds = NR_OPEN; 254 } 255 } while (nfds <= nr); 256 257 new_openset = alloc_fdset(nfds); 258 new_execset = alloc_fdset(nfds); 259 if (!new_openset || !new_execset) 260 goto out; 261 fdt->open_fds = new_openset; 262 fdt->close_on_exec = new_execset; 263 fdt->max_fdset = nfds; 264 265 nfds = NR_OPEN_DEFAULT; 266 /* 267 * Expand to the max in easy steps, and keep expanding it until 268 * we have enough for the requested fd array size. 269 */ 270 do { 271 #if NR_OPEN_DEFAULT < 256 272 if (nfds < 256) 273 nfds = 256; 274 else 275 #endif 276 if (nfds < (PAGE_SIZE / sizeof(struct file *))) 277 nfds = PAGE_SIZE / sizeof(struct file *); 278 else { 279 nfds = nfds * 2; 280 if (nfds > NR_OPEN) 281 nfds = NR_OPEN; 282 } 283 } while (nfds <= nr); 284 new_fds = alloc_fd_array(nfds); 285 if (!new_fds) 286 goto out; 287 fdt->fd = new_fds; 288 fdt->max_fds = nfds; 289 fdt->free_files = NULL; 290 return fdt; 291 out: 292 if (new_openset) 293 free_fdset(new_openset, nfds); 294 if (new_execset) 295 free_fdset(new_execset, nfds); 296 kfree(fdt); 297 return NULL; 298 } 299 300 /* 301 * Expands the file descriptor table - it will allocate a new fdtable and 302 * both fd array and fdset. It is expected to be called with the 303 * files_lock held. 304 */ 305 static int expand_fdtable(struct files_struct *files, int nr) 306 __releases(files->file_lock) 307 __acquires(files->file_lock) 308 { 309 int error = 0; 310 struct fdtable *fdt; 311 struct fdtable *nfdt = NULL; 312 313 spin_unlock(&files->file_lock); 314 nfdt = alloc_fdtable(nr); 315 if (!nfdt) { 316 error = -ENOMEM; 317 spin_lock(&files->file_lock); 318 goto out; 319 } 320 321 spin_lock(&files->file_lock); 322 fdt = files_fdtable(files); 323 /* 324 * Check again since another task may have expanded the 325 * fd table while we dropped the lock 326 */ 327 if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { 328 copy_fdtable(nfdt, fdt); 329 } else { 330 /* Somebody expanded while we dropped file_lock */ 331 spin_unlock(&files->file_lock); 332 __free_fdtable(nfdt); 333 spin_lock(&files->file_lock); 334 goto out; 335 } 336 rcu_assign_pointer(files->fdt, nfdt); 337 free_fdtable(fdt); 338 out: 339 return error; 340 } 341 342 /* 343 * Expand files. 344 * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked. 345 * Should be called with the files->file_lock spinlock held for write. 346 */ 347 int expand_files(struct files_struct *files, int nr) 348 { 349 int err, expand = 0; 350 struct fdtable *fdt; 351 352 fdt = files_fdtable(files); 353 if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { 354 if (fdt->max_fdset >= NR_OPEN || 355 fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { 356 err = -EMFILE; 357 goto out; 358 } 359 expand = 1; 360 if ((err = expand_fdtable(files, nr))) 361 goto out; 362 } 363 err = expand; 364 out: 365 return err; 366 } 367 368 static void __devinit fdtable_defer_list_init(int cpu) 369 { 370 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 371 spin_lock_init(&fddef->lock); 372 INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef); 373 init_timer(&fddef->timer); 374 fddef->timer.data = (unsigned long)fddef; 375 fddef->timer.function = fdtable_timer; 376 fddef->next = NULL; 377 } 378 379 void __init files_defer_init(void) 380 { 381 int i; 382 /* Really early - can't use for_each_cpu */ 383 for (i = 0; i < NR_CPUS; i++) 384 fdtable_defer_list_init(i); 385 } 386