1 /* 2 * linux/fs/file.c 3 * 4 * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 5 * 6 * Manage the dynamic fd arrays in the process files_struct. 7 */ 8 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/time.h> 12 #include <linux/slab.h> 13 #include <linux/vmalloc.h> 14 #include <linux/file.h> 15 #include <linux/bitops.h> 16 #include <linux/interrupt.h> 17 #include <linux/spinlock.h> 18 #include <linux/rcupdate.h> 19 #include <linux/workqueue.h> 20 21 struct fdtable_defer { 22 spinlock_t lock; 23 struct work_struct wq; 24 struct timer_list timer; 25 struct fdtable *next; 26 }; 27 28 /* 29 * We use this list to defer free fdtables that have vmalloced 30 * sets/arrays. By keeping a per-cpu list, we avoid having to embed 31 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in 32 * this per-task structure. 33 */ 34 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 35 36 37 /* 38 * Allocate an fd array, using kmalloc or vmalloc. 39 * Note: the array isn't cleared at allocation time. 40 */ 41 struct file ** alloc_fd_array(int num) 42 { 43 struct file **new_fds; 44 int size = num * sizeof(struct file *); 45 46 if (size <= PAGE_SIZE) 47 new_fds = (struct file **) kmalloc(size, GFP_KERNEL); 48 else 49 new_fds = (struct file **) vmalloc(size); 50 return new_fds; 51 } 52 53 void free_fd_array(struct file **array, int num) 54 { 55 int size = num * sizeof(struct file *); 56 57 if (!array) { 58 printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num); 59 return; 60 } 61 62 if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ 63 return; 64 else if (size <= PAGE_SIZE) 65 kfree(array); 66 else 67 vfree(array); 68 } 69 70 static void __free_fdtable(struct fdtable *fdt) 71 { 72 free_fdset(fdt->open_fds, fdt->max_fdset); 73 free_fdset(fdt->close_on_exec, fdt->max_fdset); 74 free_fd_array(fdt->fd, fdt->max_fds); 75 kfree(fdt); 76 } 77 78 static void fdtable_timer(unsigned long data) 79 { 80 struct fdtable_defer *fddef = (struct fdtable_defer *)data; 81 82 spin_lock(&fddef->lock); 83 /* 84 * If someone already emptied the queue return. 85 */ 86 if (!fddef->next) 87 goto out; 88 if (!schedule_work(&fddef->wq)) 89 mod_timer(&fddef->timer, 5); 90 out: 91 spin_unlock(&fddef->lock); 92 } 93 94 static void free_fdtable_work(struct fdtable_defer *f) 95 { 96 struct fdtable *fdt; 97 98 spin_lock_bh(&f->lock); 99 fdt = f->next; 100 f->next = NULL; 101 spin_unlock_bh(&f->lock); 102 while(fdt) { 103 struct fdtable *next = fdt->next; 104 __free_fdtable(fdt); 105 fdt = next; 106 } 107 } 108 109 static void free_fdtable_rcu(struct rcu_head *rcu) 110 { 111 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 112 int fdset_size, fdarray_size; 113 struct fdtable_defer *fddef; 114 115 BUG_ON(!fdt); 116 fdset_size = fdt->max_fdset / 8; 117 fdarray_size = fdt->max_fds * sizeof(struct file *); 118 119 if (fdt->free_files) { 120 /* 121 * The this fdtable was embedded in the files structure 122 * and the files structure itself was getting destroyed. 123 * It is now safe to free the files structure. 124 */ 125 kmem_cache_free(files_cachep, fdt->free_files); 126 return; 127 } 128 if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE && 129 fdt->max_fds <= NR_OPEN_DEFAULT) { 130 /* 131 * The fdtable was embedded 132 */ 133 return; 134 } 135 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { 136 kfree(fdt->open_fds); 137 kfree(fdt->close_on_exec); 138 kfree(fdt->fd); 139 kfree(fdt); 140 } else { 141 fddef = &get_cpu_var(fdtable_defer_list); 142 spin_lock(&fddef->lock); 143 fdt->next = fddef->next; 144 fddef->next = fdt; 145 /* 146 * vmallocs are handled from the workqueue context. 147 * If the per-cpu workqueue is running, then we 148 * defer work scheduling through a timer. 149 */ 150 if (!schedule_work(&fddef->wq)) 151 mod_timer(&fddef->timer, 5); 152 spin_unlock(&fddef->lock); 153 put_cpu_var(fdtable_defer_list); 154 } 155 } 156 157 void free_fdtable(struct fdtable *fdt) 158 { 159 if (fdt->free_files || 160 fdt->max_fdset > EMBEDDED_FD_SET_SIZE || 161 fdt->max_fds > NR_OPEN_DEFAULT) 162 call_rcu(&fdt->rcu, free_fdtable_rcu); 163 } 164 165 /* 166 * Expand the fdset in the files_struct. Called with the files spinlock 167 * held for write. 168 */ 169 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) 170 { 171 int i; 172 int count; 173 174 BUG_ON(nfdt->max_fdset < fdt->max_fdset); 175 BUG_ON(nfdt->max_fds < fdt->max_fds); 176 /* Copy the existing tables and install the new pointers */ 177 178 i = fdt->max_fdset / (sizeof(unsigned long) * 8); 179 count = (nfdt->max_fdset - fdt->max_fdset) / 8; 180 181 /* 182 * Don't copy the entire array if the current fdset is 183 * not yet initialised. 184 */ 185 if (i) { 186 memcpy (nfdt->open_fds, fdt->open_fds, 187 fdt->max_fdset/8); 188 memcpy (nfdt->close_on_exec, fdt->close_on_exec, 189 fdt->max_fdset/8); 190 memset (&nfdt->open_fds->fds_bits[i], 0, count); 191 memset (&nfdt->close_on_exec->fds_bits[i], 0, count); 192 } 193 194 /* Don't copy/clear the array if we are creating a new 195 fd array for fork() */ 196 if (fdt->max_fds) { 197 memcpy(nfdt->fd, fdt->fd, 198 fdt->max_fds * sizeof(struct file *)); 199 /* clear the remainder of the array */ 200 memset(&nfdt->fd[fdt->max_fds], 0, 201 (nfdt->max_fds - fdt->max_fds) * 202 sizeof(struct file *)); 203 } 204 } 205 206 /* 207 * Allocate an fdset array, using kmalloc or vmalloc. 208 * Note: the array isn't cleared at allocation time. 209 */ 210 fd_set * alloc_fdset(int num) 211 { 212 fd_set *new_fdset; 213 int size = num / 8; 214 215 if (size <= PAGE_SIZE) 216 new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); 217 else 218 new_fdset = (fd_set *) vmalloc(size); 219 return new_fdset; 220 } 221 222 void free_fdset(fd_set *array, int num) 223 { 224 if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */ 225 return; 226 else if (num <= 8 * PAGE_SIZE) 227 kfree(array); 228 else 229 vfree(array); 230 } 231 232 static struct fdtable *alloc_fdtable(int nr) 233 { 234 struct fdtable *fdt = NULL; 235 int nfds = 0; 236 fd_set *new_openset = NULL, *new_execset = NULL; 237 struct file **new_fds; 238 239 fdt = kzalloc(sizeof(*fdt), GFP_KERNEL); 240 if (!fdt) 241 goto out; 242 243 nfds = 8 * L1_CACHE_BYTES; 244 /* Expand to the max in easy steps */ 245 while (nfds <= nr) { 246 nfds = nfds * 2; 247 if (nfds > NR_OPEN) 248 nfds = NR_OPEN; 249 } 250 251 new_openset = alloc_fdset(nfds); 252 new_execset = alloc_fdset(nfds); 253 if (!new_openset || !new_execset) 254 goto out; 255 fdt->open_fds = new_openset; 256 fdt->close_on_exec = new_execset; 257 fdt->max_fdset = nfds; 258 259 nfds = NR_OPEN_DEFAULT; 260 /* 261 * Expand to the max in easy steps, and keep expanding it until 262 * we have enough for the requested fd array size. 263 */ 264 do { 265 #if NR_OPEN_DEFAULT < 256 266 if (nfds < 256) 267 nfds = 256; 268 else 269 #endif 270 if (nfds < (PAGE_SIZE / sizeof(struct file *))) 271 nfds = PAGE_SIZE / sizeof(struct file *); 272 else { 273 nfds = nfds * 2; 274 if (nfds > NR_OPEN) 275 nfds = NR_OPEN; 276 } 277 } while (nfds <= nr); 278 new_fds = alloc_fd_array(nfds); 279 if (!new_fds) 280 goto out; 281 fdt->fd = new_fds; 282 fdt->max_fds = nfds; 283 fdt->free_files = NULL; 284 return fdt; 285 out: 286 if (new_openset) 287 free_fdset(new_openset, nfds); 288 if (new_execset) 289 free_fdset(new_execset, nfds); 290 kfree(fdt); 291 return NULL; 292 } 293 294 /* 295 * Expands the file descriptor table - it will allocate a new fdtable and 296 * both fd array and fdset. It is expected to be called with the 297 * files_lock held. 298 */ 299 static int expand_fdtable(struct files_struct *files, int nr) 300 __releases(files->file_lock) 301 __acquires(files->file_lock) 302 { 303 int error = 0; 304 struct fdtable *fdt; 305 struct fdtable *nfdt = NULL; 306 307 spin_unlock(&files->file_lock); 308 nfdt = alloc_fdtable(nr); 309 if (!nfdt) { 310 error = -ENOMEM; 311 spin_lock(&files->file_lock); 312 goto out; 313 } 314 315 spin_lock(&files->file_lock); 316 fdt = files_fdtable(files); 317 /* 318 * Check again since another task may have expanded the 319 * fd table while we dropped the lock 320 */ 321 if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { 322 copy_fdtable(nfdt, fdt); 323 } else { 324 /* Somebody expanded while we dropped file_lock */ 325 spin_unlock(&files->file_lock); 326 __free_fdtable(nfdt); 327 spin_lock(&files->file_lock); 328 goto out; 329 } 330 rcu_assign_pointer(files->fdt, nfdt); 331 free_fdtable(fdt); 332 out: 333 return error; 334 } 335 336 /* 337 * Expand files. 338 * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked. 339 * Should be called with the files->file_lock spinlock held for write. 340 */ 341 int expand_files(struct files_struct *files, int nr) 342 { 343 int err, expand = 0; 344 struct fdtable *fdt; 345 346 fdt = files_fdtable(files); 347 if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { 348 if (fdt->max_fdset >= NR_OPEN || 349 fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { 350 err = -EMFILE; 351 goto out; 352 } 353 expand = 1; 354 if ((err = expand_fdtable(files, nr))) 355 goto out; 356 } 357 err = expand; 358 out: 359 return err; 360 } 361 362 static void __devinit fdtable_defer_list_init(int cpu) 363 { 364 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 365 spin_lock_init(&fddef->lock); 366 INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef); 367 init_timer(&fddef->timer); 368 fddef->timer.data = (unsigned long)fddef; 369 fddef->timer.function = fdtable_timer; 370 fddef->next = NULL; 371 } 372 373 void __init files_defer_init(void) 374 { 375 int i; 376 for_each_possible_cpu(i) 377 fdtable_defer_list_init(i); 378 } 379