eventpoll.c (7fbb8759eff9a348efa5f352ffaa51c364837c4b) | eventpoll.c (7ef9964e6d1b911b78709f144000aacadd0ebc21) |
---|---|
1/* 2 * fs/eventpoll.c (Efficent event polling implementation) 3 * Copyright (C) 2001,...,2007 Davide Libenzi 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. --- 88 unchanged lines hidden (view full) --- 97 98/* Maximum msec timeout value storeable in a long int */ 99#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) 100 101#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 102 103#define EP_UNACTIVE_PTR ((void *) -1L) 104 | 1/* 2 * fs/eventpoll.c (Efficent event polling implementation) 3 * Copyright (C) 2001,...,2007 Davide Libenzi 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. --- 88 unchanged lines hidden (view full) --- 97 98/* Maximum msec timeout value storeable in a long int */ 99#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) 100 101#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 102 103#define EP_UNACTIVE_PTR ((void *) -1L) 104 |
105#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry)) 106 |
|
105struct epoll_filefd { 106 struct file *file; 107 int fd; 108}; 109 110/* 111 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". 112 * It is used to keep track on all tasks that are currently inside the wake_up() code --- 82 unchanged lines hidden (view full) --- 195 struct rb_root rbr; 196 197 /* 198 * This is a single linked list that chains all the "struct epitem" that 199 * happened while transfering ready events to userspace w/out 200 * holding ->lock. 201 */ 202 struct epitem *ovflist; | 107struct epoll_filefd { 108 struct file *file; 109 int fd; 110}; 111 112/* 113 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". 114 * It is used to keep track on all tasks that are currently inside the wake_up() code --- 82 unchanged lines hidden (view full) --- 197 struct rb_root rbr; 198 199 /* 200 * This is a single linked list that chains all the "struct epitem" that 201 * happened while transfering ready events to userspace w/out 202 * holding ->lock. 203 */ 204 struct epitem *ovflist; |
205 206 /* The user that created the eventpoll descriptor */ 207 struct user_struct *user; |
|
203}; 204 205/* Wait structure used by the poll hooks */ 206struct eppoll_entry { 207 /* List header used to link this structure to the "struct epitem" */ 208 struct list_head llink; 209 210 /* The "base" pointer is set to the container "struct epitem" */ --- 11 unchanged lines hidden (view full) --- 222 223/* Wrapper struct used by poll queueing */ 224struct ep_pqueue { 225 poll_table pt; 226 struct epitem *epi; 227}; 228 229/* | 208}; 209 210/* Wait structure used by the poll hooks */ 211struct eppoll_entry { 212 /* List header used to link this structure to the "struct epitem" */ 213 struct list_head llink; 214 215 /* The "base" pointer is set to the container "struct epitem" */ --- 11 unchanged lines hidden (view full) --- 227 228/* Wrapper struct used by poll queueing */ 229struct ep_pqueue { 230 poll_table pt; 231 struct epitem *epi; 232}; 233 234/* |
235 * Configuration options available inside /proc/sys/fs/epoll/ 236 */ 237/* Maximum number of epoll devices, per user */ 238static int max_user_instances __read_mostly; 239/* Maximum number of epoll watched descriptors, per user */ 240static int max_user_watches __read_mostly; 241 242/* |
|
230 * This mutex is used to serialize ep_free() and eventpoll_release_file(). 231 */ | 243 * This mutex is used to serialize ep_free() and eventpoll_release_file(). 244 */ |
232static struct mutex epmutex; | 245static DEFINE_MUTEX(epmutex); |
233 234/* Safe wake up implementation */ 235static struct poll_safewake psw; 236 237/* Slab cache used to allocate "struct epitem" */ 238static struct kmem_cache *epi_cache __read_mostly; 239 240/* Slab cache used to allocate "struct eppoll_entry" */ 241static struct kmem_cache *pwq_cache __read_mostly; 242 | 246 247/* Safe wake up implementation */ 248static struct poll_safewake psw; 249 250/* Slab cache used to allocate "struct epitem" */ 251static struct kmem_cache *epi_cache __read_mostly; 252 253/* Slab cache used to allocate "struct eppoll_entry" */ 254static struct kmem_cache *pwq_cache __read_mostly; 255 |
256#ifdef CONFIG_SYSCTL |
|
243 | 257 |
258#include <linux/sysctl.h> 259 260static int zero; 261 262ctl_table epoll_table[] = { 263 { 264 .procname = "max_user_instances", 265 .data = &max_user_instances, 266 .maxlen = sizeof(int), 267 .mode = 0644, 268 .proc_handler = &proc_dointvec_minmax, 269 .extra1 = &zero, 270 }, 271 { 272 .procname = "max_user_watches", 273 .data = &max_user_watches, 274 .maxlen = sizeof(int), 275 .mode = 0644, 276 .proc_handler = &proc_dointvec_minmax, 277 .extra1 = &zero, 278 }, 279 { .ctl_name = 0 } 280}; 281#endif /* CONFIG_SYSCTL */ 282 283 |
|
244/* Setup the structure that is used as key for the RB tree */ 245static inline void ep_set_ffd(struct epoll_filefd *ffd, 246 struct file *file, int fd) 247{ 248 ffd->file = file; 249 ffd->fd = fd; 250} 251 --- 145 unchanged lines hidden (view full) --- 397 spin_lock_irqsave(&ep->lock, flags); 398 if (ep_is_linked(&epi->rdllink)) 399 list_del_init(&epi->rdllink); 400 spin_unlock_irqrestore(&ep->lock, flags); 401 402 /* At this point it is safe to free the eventpoll item */ 403 kmem_cache_free(epi_cache, epi); 404 | 284/* Setup the structure that is used as key for the RB tree */ 285static inline void ep_set_ffd(struct epoll_filefd *ffd, 286 struct file *file, int fd) 287{ 288 ffd->file = file; 289 ffd->fd = fd; 290} 291 --- 145 unchanged lines hidden (view full) --- 437 spin_lock_irqsave(&ep->lock, flags); 438 if (ep_is_linked(&epi->rdllink)) 439 list_del_init(&epi->rdllink); 440 spin_unlock_irqrestore(&ep->lock, flags); 441 442 /* At this point it is safe to free the eventpoll item */ 443 kmem_cache_free(epi_cache, epi); 444 |
445 atomic_dec(&ep->user->epoll_watches); 446 |
|
405 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", 406 current, ep, file)); 407 408 return 0; 409} 410 411static void ep_free(struct eventpoll *ep) 412{ --- 31 unchanged lines hidden (view full) --- 444 */ 445 while ((rbp = rb_first(&ep->rbr)) != NULL) { 446 epi = rb_entry(rbp, struct epitem, rbn); 447 ep_remove(ep, epi); 448 } 449 450 mutex_unlock(&epmutex); 451 mutex_destroy(&ep->mtx); | 447 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", 448 current, ep, file)); 449 450 return 0; 451} 452 453static void ep_free(struct eventpoll *ep) 454{ --- 31 unchanged lines hidden (view full) --- 486 */ 487 while ((rbp = rb_first(&ep->rbr)) != NULL) { 488 epi = rb_entry(rbp, struct epitem, rbn); 489 ep_remove(ep, epi); 490 } 491 492 mutex_unlock(&epmutex); 493 mutex_destroy(&ep->mtx); |
494 atomic_dec(&ep->user->epoll_devs); 495 free_uid(ep->user); |
|
452 kfree(ep); 453} 454 455static int ep_eventpoll_release(struct inode *inode, struct file *file) 456{ 457 struct eventpoll *ep = file->private_data; 458 459 if (ep) --- 67 unchanged lines hidden (view full) --- 527 mutex_unlock(&ep->mtx); 528 } 529 530 mutex_unlock(&epmutex); 531} 532 533static int ep_alloc(struct eventpoll **pep) 534{ | 496 kfree(ep); 497} 498 499static int ep_eventpoll_release(struct inode *inode, struct file *file) 500{ 501 struct eventpoll *ep = file->private_data; 502 503 if (ep) --- 67 unchanged lines hidden (view full) --- 571 mutex_unlock(&ep->mtx); 572 } 573 574 mutex_unlock(&epmutex); 575} 576 577static int ep_alloc(struct eventpoll **pep) 578{ |
535 struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); | 579 int error; 580 struct user_struct *user; 581 struct eventpoll *ep; |
536 | 582 |
537 if (!ep) 538 return -ENOMEM; | 583 user = get_current_user(); 584 error = -EMFILE; 585 if (unlikely(atomic_read(&user->epoll_devs) >= 586 max_user_instances)) 587 goto free_uid; 588 error = -ENOMEM; 589 ep = kzalloc(sizeof(*ep), GFP_KERNEL); 590 if (unlikely(!ep)) 591 goto free_uid; |
539 540 spin_lock_init(&ep->lock); 541 mutex_init(&ep->mtx); 542 init_waitqueue_head(&ep->wq); 543 init_waitqueue_head(&ep->poll_wait); 544 INIT_LIST_HEAD(&ep->rdllist); 545 ep->rbr = RB_ROOT; 546 ep->ovflist = EP_UNACTIVE_PTR; | 592 593 spin_lock_init(&ep->lock); 594 mutex_init(&ep->mtx); 595 init_waitqueue_head(&ep->wq); 596 init_waitqueue_head(&ep->poll_wait); 597 INIT_LIST_HEAD(&ep->rdllist); 598 ep->rbr = RB_ROOT; 599 ep->ovflist = EP_UNACTIVE_PTR; |
600 ep->user = user; |
|
547 548 *pep = ep; 549 550 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", 551 current, ep)); 552 return 0; | 601 602 *pep = ep; 603 604 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", 605 current, ep)); 606 return 0; |
607 608free_uid: 609 free_uid(user); 610 return error; |
|
553} 554 555/* 556 * Search the file inside the eventpoll tree. The RB tree operations 557 * are protected by the "mtx" mutex, and ep_find() must be called with 558 * "mtx" held. 559 */ 560static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) --- 137 unchanged lines hidden (view full) --- 698static int ep_insert(struct eventpoll *ep, struct epoll_event *event, 699 struct file *tfile, int fd) 700{ 701 int error, revents, pwake = 0; 702 unsigned long flags; 703 struct epitem *epi; 704 struct ep_pqueue epq; 705 | 611} 612 613/* 614 * Search the file inside the eventpoll tree. The RB tree operations 615 * are protected by the "mtx" mutex, and ep_find() must be called with 616 * "mtx" held. 617 */ 618static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) --- 137 unchanged lines hidden (view full) --- 756static int ep_insert(struct eventpoll *ep, struct epoll_event *event, 757 struct file *tfile, int fd) 758{ 759 int error, revents, pwake = 0; 760 unsigned long flags; 761 struct epitem *epi; 762 struct ep_pqueue epq; 763 |
706 error = -ENOMEM; | 764 if (unlikely(atomic_read(&ep->user->epoll_watches) >= 765 max_user_watches)) 766 return -ENOSPC; |
707 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) | 767 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) |
708 goto error_return; | 768 return -ENOMEM; |
709 710 /* Item initialization follow here ... */ 711 INIT_LIST_HEAD(&epi->rdllink); 712 INIT_LIST_HEAD(&epi->fllink); 713 INIT_LIST_HEAD(&epi->pwqlist); 714 epi->ep = ep; 715 ep_set_ffd(&epi->ffd, tfile, fd); 716 epi->event = *event; --- 13 unchanged lines hidden (view full) --- 730 */ 731 revents = tfile->f_op->poll(tfile, &epq.pt); 732 733 /* 734 * We have to check if something went wrong during the poll wait queue 735 * install process. Namely an allocation for a wait queue failed due 736 * high memory pressure. 737 */ | 769 770 /* Item initialization follow here ... */ 771 INIT_LIST_HEAD(&epi->rdllink); 772 INIT_LIST_HEAD(&epi->fllink); 773 INIT_LIST_HEAD(&epi->pwqlist); 774 epi->ep = ep; 775 ep_set_ffd(&epi->ffd, tfile, fd); 776 epi->event = *event; --- 13 unchanged lines hidden (view full) --- 790 */ 791 revents = tfile->f_op->poll(tfile, &epq.pt); 792 793 /* 794 * We have to check if something went wrong during the poll wait queue 795 * install process. Namely an allocation for a wait queue failed due 796 * high memory pressure. 797 */ |
798 error = -ENOMEM; |
|
738 if (epi->nwait < 0) 739 goto error_unregister; 740 741 /* Add the current item to the list of active epoll hook for this file */ 742 spin_lock(&tfile->f_ep_lock); 743 list_add_tail(&epi->fllink, &tfile->f_ep_links); 744 spin_unlock(&tfile->f_ep_lock); 745 --- 14 unchanged lines hidden (view full) --- 760 if (waitqueue_active(&ep->wq)) 761 wake_up_locked(&ep->wq); 762 if (waitqueue_active(&ep->poll_wait)) 763 pwake++; 764 } 765 766 spin_unlock_irqrestore(&ep->lock, flags); 767 | 799 if (epi->nwait < 0) 800 goto error_unregister; 801 802 /* Add the current item to the list of active epoll hook for this file */ 803 spin_lock(&tfile->f_ep_lock); 804 list_add_tail(&epi->fllink, &tfile->f_ep_links); 805 spin_unlock(&tfile->f_ep_lock); 806 --- 14 unchanged lines hidden (view full) --- 821 if (waitqueue_active(&ep->wq)) 822 wake_up_locked(&ep->wq); 823 if (waitqueue_active(&ep->poll_wait)) 824 pwake++; 825 } 826 827 spin_unlock_irqrestore(&ep->lock, flags); 828 |
829 atomic_inc(&ep->user->epoll_watches); 830 |
|
768 /* We have to call this outside the lock */ 769 if (pwake) 770 ep_poll_safewake(&psw, &ep->poll_wait); 771 772 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n", 773 current, ep, tfile, fd)); 774 775 return 0; --- 8 unchanged lines hidden (view full) --- 784 * And ep_insert() is called with "mtx" held. 785 */ 786 spin_lock_irqsave(&ep->lock, flags); 787 if (ep_is_linked(&epi->rdllink)) 788 list_del_init(&epi->rdllink); 789 spin_unlock_irqrestore(&ep->lock, flags); 790 791 kmem_cache_free(epi_cache, epi); | 831 /* We have to call this outside the lock */ 832 if (pwake) 833 ep_poll_safewake(&psw, &ep->poll_wait); 834 835 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n", 836 current, ep, tfile, fd)); 837 838 return 0; --- 8 unchanged lines hidden (view full) --- 847 * And ep_insert() is called with "mtx" held. 848 */ 849 spin_lock_irqsave(&ep->lock, flags); 850 if (ep_is_linked(&epi->rdllink)) 851 list_del_init(&epi->rdllink); 852 spin_unlock_irqrestore(&ep->lock, flags); 853 854 kmem_cache_free(epi_cache, epi); |
792error_return: | 855 |
793 return error; 794} 795 796/* 797 * Modify the interest event mask by dropping an event if the new mask 798 * has a match in the current file status. Must be called with "mtx" held. 799 */ 800static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) --- 272 unchanged lines hidden (view full) --- 1073 /* 1074 * Creates all the items needed to setup an eventpoll file. That is, 1075 * a file structure and a free file descriptor. 1076 */ 1077 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1078 flags & O_CLOEXEC); 1079 if (fd < 0) 1080 ep_free(ep); | 856 return error; 857} 858 859/* 860 * Modify the interest event mask by dropping an event if the new mask 861 * has a match in the current file status. Must be called with "mtx" held. 862 */ 863static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) --- 272 unchanged lines hidden (view full) --- 1136 /* 1137 * Creates all the items needed to setup an eventpoll file. That is, 1138 * a file structure and a free file descriptor. 1139 */ 1140 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1141 flags & O_CLOEXEC); 1142 if (fd < 0) 1143 ep_free(ep); |
1144 atomic_inc(&ep->user->epoll_devs); |
|
1081 1082error_return: 1083 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1084 current, flags, fd)); 1085 1086 return fd; 1087} 1088 --- 205 unchanged lines hidden (view full) --- 1294 1295 return error; 1296} 1297 1298#endif /* HAVE_SET_RESTORE_SIGMASK */ 1299 1300static int __init eventpoll_init(void) 1301{ | 1145 1146error_return: 1147 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1148 current, flags, fd)); 1149 1150 return fd; 1151} 1152 --- 205 unchanged lines hidden (view full) --- 1358 1359 return error; 1360} 1361 1362#endif /* HAVE_SET_RESTORE_SIGMASK */ 1363 1364static int __init eventpoll_init(void) 1365{ |
1302 mutex_init(&epmutex); | 1366 struct sysinfo si; |
1303 | 1367 |
1368 si_meminfo(&si); 1369 max_user_instances = 128; 1370 max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) / 1371 EP_ITEM_COST; 1372 |
|
1304 /* Initialize the structure used to perform safe poll wait head wake ups */ 1305 ep_poll_safewake_init(&psw); 1306 1307 /* Allocates slab cache used to allocate "struct epitem" items */ 1308 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 1309 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, 1310 NULL); 1311 1312 /* Allocates slab cache used to allocate "struct eppoll_entry" */ 1313 pwq_cache = kmem_cache_create("eventpoll_pwq", 1314 sizeof(struct eppoll_entry), 0, 1315 EPI_SLAB_DEBUG|SLAB_PANIC, NULL); 1316 1317 return 0; 1318} 1319fs_initcall(eventpoll_init); | 1373 /* Initialize the structure used to perform safe poll wait head wake ups */ 1374 ep_poll_safewake_init(&psw); 1375 1376 /* Allocates slab cache used to allocate "struct epitem" items */ 1377 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 1378 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, 1379 NULL); 1380 1381 /* Allocates slab cache used to allocate "struct eppoll_entry" */ 1382 pwq_cache = kmem_cache_create("eventpoll_pwq", 1383 sizeof(struct eppoll_entry), 0, 1384 EPI_SLAB_DEBUG|SLAB_PANIC, NULL); 1385 1386 return 0; 1387} 1388fs_initcall(eventpoll_init); |