eventpoll.c (7fbb8759eff9a348efa5f352ffaa51c364837c4b) eventpoll.c (7ef9964e6d1b911b78709f144000aacadd0ebc21)
1/*
2 * fs/eventpoll.c (Efficent event polling implementation)
3 * Copyright (C) 2001,...,2007 Davide Libenzi
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.

--- 88 unchanged lines hidden (view full) ---

97
98/* Maximum msec timeout value storeable in a long int */
99#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
100
101#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
102
103#define EP_UNACTIVE_PTR ((void *) -1L)
104
1/*
2 * fs/eventpoll.c (Efficent event polling implementation)
3 * Copyright (C) 2001,...,2007 Davide Libenzi
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.

--- 88 unchanged lines hidden (view full) ---

97
98/* Maximum msec timeout value storeable in a long int */
99#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
100
101#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
102
103#define EP_UNACTIVE_PTR ((void *) -1L)
104
105#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
106
105struct epoll_filefd {
106 struct file *file;
107 int fd;
108};
109
110/*
111 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
112 * It is used to keep track on all tasks that are currently inside the wake_up() code

--- 82 unchanged lines hidden (view full) ---

195 struct rb_root rbr;
196
197 /*
198 * This is a single linked list that chains all the "struct epitem" that
199 * happened while transfering ready events to userspace w/out
200 * holding ->lock.
201 */
202 struct epitem *ovflist;
107struct epoll_filefd {
108 struct file *file;
109 int fd;
110};
111
112/*
113 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
114 * It is used to keep track on all tasks that are currently inside the wake_up() code

--- 82 unchanged lines hidden (view full) ---

197 struct rb_root rbr;
198
199 /*
200 * This is a single linked list that chains all the "struct epitem" that
201 * happened while transfering ready events to userspace w/out
202 * holding ->lock.
203 */
204 struct epitem *ovflist;
205
206 /* The user that created the eventpoll descriptor */
207 struct user_struct *user;
203};
204
205/* Wait structure used by the poll hooks */
206struct eppoll_entry {
207 /* List header used to link this structure to the "struct epitem" */
208 struct list_head llink;
209
210 /* The "base" pointer is set to the container "struct epitem" */

--- 11 unchanged lines hidden (view full) ---

222
223/* Wrapper struct used by poll queueing */
224struct ep_pqueue {
225 poll_table pt;
226 struct epitem *epi;
227};
228
229/*
208};
209
210/* Wait structure used by the poll hooks */
211struct eppoll_entry {
212 /* List header used to link this structure to the "struct epitem" */
213 struct list_head llink;
214
215 /* The "base" pointer is set to the container "struct epitem" */

--- 11 unchanged lines hidden (view full) ---

227
228/* Wrapper struct used by poll queueing */
229struct ep_pqueue {
230 poll_table pt;
231 struct epitem *epi;
232};
233
234/*
235 * Configuration options available inside /proc/sys/fs/epoll/
236 */
237/* Maximum number of epoll devices, per user */
238static int max_user_instances __read_mostly;
239/* Maximum number of epoll watched descriptors, per user */
240static int max_user_watches __read_mostly;
241
242/*
230 * This mutex is used to serialize ep_free() and eventpoll_release_file().
231 */
243 * This mutex is used to serialize ep_free() and eventpoll_release_file().
244 */
232static struct mutex epmutex;
245static DEFINE_MUTEX(epmutex);
233
234/* Safe wake up implementation */
235static struct poll_safewake psw;
236
237/* Slab cache used to allocate "struct epitem" */
238static struct kmem_cache *epi_cache __read_mostly;
239
240/* Slab cache used to allocate "struct eppoll_entry" */
241static struct kmem_cache *pwq_cache __read_mostly;
242
246
247/* Safe wake up implementation */
248static struct poll_safewake psw;
249
250/* Slab cache used to allocate "struct epitem" */
251static struct kmem_cache *epi_cache __read_mostly;
252
253/* Slab cache used to allocate "struct eppoll_entry" */
254static struct kmem_cache *pwq_cache __read_mostly;
255
256#ifdef CONFIG_SYSCTL
243
257
258#include <linux/sysctl.h>
259
260static int zero;
261
262ctl_table epoll_table[] = {
263 {
264 .procname = "max_user_instances",
265 .data = &max_user_instances,
266 .maxlen = sizeof(int),
267 .mode = 0644,
268 .proc_handler = &proc_dointvec_minmax,
269 .extra1 = &zero,
270 },
271 {
272 .procname = "max_user_watches",
273 .data = &max_user_watches,
274 .maxlen = sizeof(int),
275 .mode = 0644,
276 .proc_handler = &proc_dointvec_minmax,
277 .extra1 = &zero,
278 },
279 { .ctl_name = 0 }
280};
281#endif /* CONFIG_SYSCTL */
282
283
244/* Setup the structure that is used as key for the RB tree */
245static inline void ep_set_ffd(struct epoll_filefd *ffd,
246 struct file *file, int fd)
247{
248 ffd->file = file;
249 ffd->fd = fd;
250}
251

--- 145 unchanged lines hidden (view full) ---

397 spin_lock_irqsave(&ep->lock, flags);
398 if (ep_is_linked(&epi->rdllink))
399 list_del_init(&epi->rdllink);
400 spin_unlock_irqrestore(&ep->lock, flags);
401
402 /* At this point it is safe to free the eventpoll item */
403 kmem_cache_free(epi_cache, epi);
404
284/* Setup the structure that is used as key for the RB tree */
285static inline void ep_set_ffd(struct epoll_filefd *ffd,
286 struct file *file, int fd)
287{
288 ffd->file = file;
289 ffd->fd = fd;
290}
291

--- 145 unchanged lines hidden (view full) ---

437 spin_lock_irqsave(&ep->lock, flags);
438 if (ep_is_linked(&epi->rdllink))
439 list_del_init(&epi->rdllink);
440 spin_unlock_irqrestore(&ep->lock, flags);
441
442 /* At this point it is safe to free the eventpoll item */
443 kmem_cache_free(epi_cache, epi);
444
445 atomic_dec(&ep->user->epoll_watches);
446
405 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
406 current, ep, file));
407
408 return 0;
409}
410
411static void ep_free(struct eventpoll *ep)
412{

--- 31 unchanged lines hidden (view full) ---

444 */
445 while ((rbp = rb_first(&ep->rbr)) != NULL) {
446 epi = rb_entry(rbp, struct epitem, rbn);
447 ep_remove(ep, epi);
448 }
449
450 mutex_unlock(&epmutex);
451 mutex_destroy(&ep->mtx);
447 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
448 current, ep, file));
449
450 return 0;
451}
452
453static void ep_free(struct eventpoll *ep)
454{

--- 31 unchanged lines hidden (view full) ---

486 */
487 while ((rbp = rb_first(&ep->rbr)) != NULL) {
488 epi = rb_entry(rbp, struct epitem, rbn);
489 ep_remove(ep, epi);
490 }
491
492 mutex_unlock(&epmutex);
493 mutex_destroy(&ep->mtx);
494 atomic_dec(&ep->user->epoll_devs);
495 free_uid(ep->user);
452 kfree(ep);
453}
454
455static int ep_eventpoll_release(struct inode *inode, struct file *file)
456{
457 struct eventpoll *ep = file->private_data;
458
459 if (ep)

--- 67 unchanged lines hidden (view full) ---

527 mutex_unlock(&ep->mtx);
528 }
529
530 mutex_unlock(&epmutex);
531}
532
533static int ep_alloc(struct eventpoll **pep)
534{
496 kfree(ep);
497}
498
499static int ep_eventpoll_release(struct inode *inode, struct file *file)
500{
501 struct eventpoll *ep = file->private_data;
502
503 if (ep)

--- 67 unchanged lines hidden (view full) ---

571 mutex_unlock(&ep->mtx);
572 }
573
574 mutex_unlock(&epmutex);
575}
576
577static int ep_alloc(struct eventpoll **pep)
578{
535 struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
579 int error;
580 struct user_struct *user;
581 struct eventpoll *ep;
536
582
537 if (!ep)
538 return -ENOMEM;
583 user = get_current_user();
584 error = -EMFILE;
585 if (unlikely(atomic_read(&user->epoll_devs) >=
586 max_user_instances))
587 goto free_uid;
588 error = -ENOMEM;
589 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
590 if (unlikely(!ep))
591 goto free_uid;
539
540 spin_lock_init(&ep->lock);
541 mutex_init(&ep->mtx);
542 init_waitqueue_head(&ep->wq);
543 init_waitqueue_head(&ep->poll_wait);
544 INIT_LIST_HEAD(&ep->rdllist);
545 ep->rbr = RB_ROOT;
546 ep->ovflist = EP_UNACTIVE_PTR;
592
593 spin_lock_init(&ep->lock);
594 mutex_init(&ep->mtx);
595 init_waitqueue_head(&ep->wq);
596 init_waitqueue_head(&ep->poll_wait);
597 INIT_LIST_HEAD(&ep->rdllist);
598 ep->rbr = RB_ROOT;
599 ep->ovflist = EP_UNACTIVE_PTR;
600 ep->user = user;
547
548 *pep = ep;
549
550 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
551 current, ep));
552 return 0;
601
602 *pep = ep;
603
604 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
605 current, ep));
606 return 0;
607
608free_uid:
609 free_uid(user);
610 return error;
553}
554
555/*
556 * Search the file inside the eventpoll tree. The RB tree operations
557 * are protected by the "mtx" mutex, and ep_find() must be called with
558 * "mtx" held.
559 */
560static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)

--- 137 unchanged lines hidden (view full) ---

698static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
699 struct file *tfile, int fd)
700{
701 int error, revents, pwake = 0;
702 unsigned long flags;
703 struct epitem *epi;
704 struct ep_pqueue epq;
705
611}
612
613/*
614 * Search the file inside the eventpoll tree. The RB tree operations
615 * are protected by the "mtx" mutex, and ep_find() must be called with
616 * "mtx" held.
617 */
618static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)

--- 137 unchanged lines hidden (view full) ---

756static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
757 struct file *tfile, int fd)
758{
759 int error, revents, pwake = 0;
760 unsigned long flags;
761 struct epitem *epi;
762 struct ep_pqueue epq;
763
706 error = -ENOMEM;
764 if (unlikely(atomic_read(&ep->user->epoll_watches) >=
765 max_user_watches))
766 return -ENOSPC;
707 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
767 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
708 goto error_return;
768 return -ENOMEM;
709
710 /* Item initialization follow here ... */
711 INIT_LIST_HEAD(&epi->rdllink);
712 INIT_LIST_HEAD(&epi->fllink);
713 INIT_LIST_HEAD(&epi->pwqlist);
714 epi->ep = ep;
715 ep_set_ffd(&epi->ffd, tfile, fd);
716 epi->event = *event;

--- 13 unchanged lines hidden (view full) ---

730 */
731 revents = tfile->f_op->poll(tfile, &epq.pt);
732
733 /*
734 * We have to check if something went wrong during the poll wait queue
735 * install process. Namely an allocation for a wait queue failed due
736 * high memory pressure.
737 */
769
770 /* Item initialization follow here ... */
771 INIT_LIST_HEAD(&epi->rdllink);
772 INIT_LIST_HEAD(&epi->fllink);
773 INIT_LIST_HEAD(&epi->pwqlist);
774 epi->ep = ep;
775 ep_set_ffd(&epi->ffd, tfile, fd);
776 epi->event = *event;

--- 13 unchanged lines hidden (view full) ---

790 */
791 revents = tfile->f_op->poll(tfile, &epq.pt);
792
793 /*
794 * We have to check if something went wrong during the poll wait queue
795 * install process. Namely an allocation for a wait queue failed due
796 * high memory pressure.
797 */
798 error = -ENOMEM;
738 if (epi->nwait < 0)
739 goto error_unregister;
740
741 /* Add the current item to the list of active epoll hook for this file */
742 spin_lock(&tfile->f_ep_lock);
743 list_add_tail(&epi->fllink, &tfile->f_ep_links);
744 spin_unlock(&tfile->f_ep_lock);
745

--- 14 unchanged lines hidden (view full) ---

760 if (waitqueue_active(&ep->wq))
761 wake_up_locked(&ep->wq);
762 if (waitqueue_active(&ep->poll_wait))
763 pwake++;
764 }
765
766 spin_unlock_irqrestore(&ep->lock, flags);
767
799 if (epi->nwait < 0)
800 goto error_unregister;
801
802 /* Add the current item to the list of active epoll hook for this file */
803 spin_lock(&tfile->f_ep_lock);
804 list_add_tail(&epi->fllink, &tfile->f_ep_links);
805 spin_unlock(&tfile->f_ep_lock);
806

--- 14 unchanged lines hidden (view full) ---

821 if (waitqueue_active(&ep->wq))
822 wake_up_locked(&ep->wq);
823 if (waitqueue_active(&ep->poll_wait))
824 pwake++;
825 }
826
827 spin_unlock_irqrestore(&ep->lock, flags);
828
829 atomic_inc(&ep->user->epoll_watches);
830
768 /* We have to call this outside the lock */
769 if (pwake)
770 ep_poll_safewake(&psw, &ep->poll_wait);
771
772 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n",
773 current, ep, tfile, fd));
774
775 return 0;

--- 8 unchanged lines hidden (view full) ---

784 * And ep_insert() is called with "mtx" held.
785 */
786 spin_lock_irqsave(&ep->lock, flags);
787 if (ep_is_linked(&epi->rdllink))
788 list_del_init(&epi->rdllink);
789 spin_unlock_irqrestore(&ep->lock, flags);
790
791 kmem_cache_free(epi_cache, epi);
831 /* We have to call this outside the lock */
832 if (pwake)
833 ep_poll_safewake(&psw, &ep->poll_wait);
834
835 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n",
836 current, ep, tfile, fd));
837
838 return 0;

--- 8 unchanged lines hidden (view full) ---

847 * And ep_insert() is called with "mtx" held.
848 */
849 spin_lock_irqsave(&ep->lock, flags);
850 if (ep_is_linked(&epi->rdllink))
851 list_del_init(&epi->rdllink);
852 spin_unlock_irqrestore(&ep->lock, flags);
853
854 kmem_cache_free(epi_cache, epi);
792error_return:
855
793 return error;
794}
795
796/*
797 * Modify the interest event mask by dropping an event if the new mask
798 * has a match in the current file status. Must be called with "mtx" held.
799 */
800static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event)

--- 272 unchanged lines hidden (view full) ---

1073 /*
1074 * Creates all the items needed to setup an eventpoll file. That is,
1075 * a file structure and a free file descriptor.
1076 */
1077 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1078 flags & O_CLOEXEC);
1079 if (fd < 0)
1080 ep_free(ep);
856 return error;
857}
858
859/*
860 * Modify the interest event mask by dropping an event if the new mask
861 * has a match in the current file status. Must be called with "mtx" held.
862 */
863static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event)

--- 272 unchanged lines hidden (view full) ---

1136 /*
1137 * Creates all the items needed to setup an eventpoll file. That is,
1138 * a file structure and a free file descriptor.
1139 */
1140 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1141 flags & O_CLOEXEC);
1142 if (fd < 0)
1143 ep_free(ep);
1144 atomic_inc(&ep->user->epoll_devs);
1081
1082error_return:
1083 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1084 current, flags, fd));
1085
1086 return fd;
1087}
1088

--- 205 unchanged lines hidden (view full) ---

1294
1295 return error;
1296}
1297
1298#endif /* HAVE_SET_RESTORE_SIGMASK */
1299
1300static int __init eventpoll_init(void)
1301{
1145
1146error_return:
1147 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1148 current, flags, fd));
1149
1150 return fd;
1151}
1152

--- 205 unchanged lines hidden (view full) ---

1358
1359 return error;
1360}
1361
1362#endif /* HAVE_SET_RESTORE_SIGMASK */
1363
1364static int __init eventpoll_init(void)
1365{
1302 mutex_init(&epmutex);
1366 struct sysinfo si;
1303
1367
1368 si_meminfo(&si);
1369 max_user_instances = 128;
1370 max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) /
1371 EP_ITEM_COST;
1372
1304 /* Initialize the structure used to perform safe poll wait head wake ups */
1305 ep_poll_safewake_init(&psw);
1306
1307 /* Allocates slab cache used to allocate "struct epitem" items */
1308 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
1309 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC,
1310 NULL);
1311
1312 /* Allocates slab cache used to allocate "struct eppoll_entry" */
1313 pwq_cache = kmem_cache_create("eventpoll_pwq",
1314 sizeof(struct eppoll_entry), 0,
1315 EPI_SLAB_DEBUG|SLAB_PANIC, NULL);
1316
1317 return 0;
1318}
1319fs_initcall(eventpoll_init);
1373 /* Initialize the structure used to perform safe poll wait head wake ups */
1374 ep_poll_safewake_init(&psw);
1375
1376 /* Allocates slab cache used to allocate "struct epitem" items */
1377 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
1378 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC,
1379 NULL);
1380
1381 /* Allocates slab cache used to allocate "struct eppoll_entry" */
1382 pwq_cache = kmem_cache_create("eventpoll_pwq",
1383 sizeof(struct eppoll_entry), 0,
1384 EPI_SLAB_DEBUG|SLAB_PANIC, NULL);
1385
1386 return 0;
1387}
1388fs_initcall(eventpoll_init);