1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * epoll(7) file descriptor monitoring
4 */
5
6 #include "qemu/osdep.h"
7 #include <sys/epoll.h>
8 #include "qemu/lockcnt.h"
9 #include "qemu/rcu_queue.h"
10 #include "aio-posix.h"
11
12 /* The fd number threshold to switch to epoll */
13 #define EPOLL_ENABLE_THRESHOLD 64
14
fdmon_epoll_disable(AioContext * ctx)15 void fdmon_epoll_disable(AioContext *ctx)
16 {
17 if (ctx->epollfd >= 0) {
18 close(ctx->epollfd);
19 ctx->epollfd = -1;
20 }
21
22 /* Switch back */
23 ctx->fdmon_ops = &fdmon_poll_ops;
24 }
25
epoll_events_from_pfd(int pfd_events)26 static inline int epoll_events_from_pfd(int pfd_events)
27 {
28 return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
29 (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
30 (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
31 (pfd_events & G_IO_ERR ? EPOLLERR : 0);
32 }
33
fdmon_epoll_update(AioContext * ctx,AioHandler * old_node,AioHandler * new_node)34 static void fdmon_epoll_update(AioContext *ctx,
35 AioHandler *old_node,
36 AioHandler *new_node)
37 {
38 struct epoll_event event = {
39 .data.ptr = new_node,
40 .events = new_node ? epoll_events_from_pfd(new_node->pfd.events) : 0,
41 };
42 int r;
43
44 if (!new_node) {
45 r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, old_node->pfd.fd, &event);
46 } else if (!old_node) {
47 r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, new_node->pfd.fd, &event);
48 } else {
49 r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, new_node->pfd.fd, &event);
50 }
51
52 if (r) {
53 fdmon_epoll_disable(ctx);
54 }
55 }
56
fdmon_epoll_wait(AioContext * ctx,AioHandlerList * ready_list,int64_t timeout)57 static int fdmon_epoll_wait(AioContext *ctx, AioHandlerList *ready_list,
58 int64_t timeout)
59 {
60 GPollFD pfd = {
61 .fd = ctx->epollfd,
62 .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
63 };
64 AioHandler *node;
65 int i, ret = 0;
66 struct epoll_event events[128];
67
68 if (timeout > 0) {
69 ret = qemu_poll_ns(&pfd, 1, timeout);
70 if (ret > 0) {
71 timeout = 0;
72 }
73 }
74 if (timeout <= 0 || ret > 0) {
75 ret = epoll_wait(ctx->epollfd, events,
76 ARRAY_SIZE(events),
77 timeout);
78 if (ret <= 0) {
79 goto out;
80 }
81 for (i = 0; i < ret; i++) {
82 int ev = events[i].events;
83 int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
84 (ev & EPOLLOUT ? G_IO_OUT : 0) |
85 (ev & EPOLLHUP ? G_IO_HUP : 0) |
86 (ev & EPOLLERR ? G_IO_ERR : 0);
87
88 node = events[i].data.ptr;
89 aio_add_ready_handler(ready_list, node, revents);
90 }
91 }
92 out:
93 return ret;
94 }
95
96 static const FDMonOps fdmon_epoll_ops = {
97 .update = fdmon_epoll_update,
98 .wait = fdmon_epoll_wait,
99 .need_wait = aio_poll_disabled,
100 };
101
fdmon_epoll_try_enable(AioContext * ctx)102 static bool fdmon_epoll_try_enable(AioContext *ctx)
103 {
104 AioHandler *node;
105 struct epoll_event event;
106
107 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
108 int r;
109 if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) {
110 continue;
111 }
112 event.events = epoll_events_from_pfd(node->pfd.events);
113 event.data.ptr = node;
114 r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
115 if (r) {
116 return false;
117 }
118 }
119
120 ctx->fdmon_ops = &fdmon_epoll_ops;
121 return true;
122 }
123
fdmon_epoll_try_upgrade(AioContext * ctx,unsigned npfd)124 bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
125 {
126 bool ok;
127
128 if (ctx->epollfd < 0) {
129 return false;
130 }
131
132 if (npfd < EPOLL_ENABLE_THRESHOLD) {
133 return false;
134 }
135
136 /* The list must not change while we add fds to epoll */
137 if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
138 return false;
139 }
140
141 ok = fdmon_epoll_try_enable(ctx);
142
143 qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
144
145 if (!ok) {
146 fdmon_epoll_disable(ctx);
147 }
148 return ok;
149 }
150
fdmon_epoll_setup(AioContext * ctx)151 void fdmon_epoll_setup(AioContext *ctx)
152 {
153 ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
154 if (ctx->epollfd == -1) {
155 fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
156 }
157 }
158