11f050a46SStefan Hajnoczi /* SPDX-License-Identifier: GPL-2.0-or-later */
21f050a46SStefan Hajnoczi /*
31f050a46SStefan Hajnoczi * poll(2) file descriptor monitoring
41f050a46SStefan Hajnoczi *
51f050a46SStefan Hajnoczi * Uses ppoll(2) when available, g_poll() otherwise.
61f050a46SStefan Hajnoczi */
71f050a46SStefan Hajnoczi
81f050a46SStefan Hajnoczi #include "qemu/osdep.h"
91f050a46SStefan Hajnoczi #include "aio-posix.h"
101f050a46SStefan Hajnoczi #include "qemu/rcu_queue.h"
111f050a46SStefan Hajnoczi
121f050a46SStefan Hajnoczi /*
131f050a46SStefan Hajnoczi * These thread-local variables are used only in fdmon_poll_wait() around the
141f050a46SStefan Hajnoczi * call to the poll() system call. In particular they are not used while
151f050a46SStefan Hajnoczi * aio_poll is performing callbacks, which makes it much easier to think about
161f050a46SStefan Hajnoczi * reentrancy!
171f050a46SStefan Hajnoczi *
181f050a46SStefan Hajnoczi * Stack-allocated arrays would be perfect but they have size limitations;
191f050a46SStefan Hajnoczi * heap allocation is expensive enough that we want to reuse arrays across
201f050a46SStefan Hajnoczi * calls to aio_poll(). And because poll() has to be called without holding
211f050a46SStefan Hajnoczi * any lock, the arrays cannot be stored in AioContext. Thread-local data
221f050a46SStefan Hajnoczi * has none of the disadvantages of these three options.
231f050a46SStefan Hajnoczi */
241f050a46SStefan Hajnoczi static __thread GPollFD *pollfds;
251f050a46SStefan Hajnoczi static __thread AioHandler **nodes;
261f050a46SStefan Hajnoczi static __thread unsigned npfd, nalloc;
271f050a46SStefan Hajnoczi static __thread Notifier pollfds_cleanup_notifier;
281f050a46SStefan Hajnoczi
pollfds_cleanup(Notifier * n,void * unused)291f050a46SStefan Hajnoczi static void pollfds_cleanup(Notifier *n, void *unused)
301f050a46SStefan Hajnoczi {
311f050a46SStefan Hajnoczi g_assert(npfd == 0);
321f050a46SStefan Hajnoczi g_free(pollfds);
331f050a46SStefan Hajnoczi g_free(nodes);
341f050a46SStefan Hajnoczi nalloc = 0;
351f050a46SStefan Hajnoczi }
361f050a46SStefan Hajnoczi
add_pollfd(AioHandler * node)371f050a46SStefan Hajnoczi static void add_pollfd(AioHandler *node)
381f050a46SStefan Hajnoczi {
391f050a46SStefan Hajnoczi if (npfd == nalloc) {
401f050a46SStefan Hajnoczi if (nalloc == 0) {
411f050a46SStefan Hajnoczi pollfds_cleanup_notifier.notify = pollfds_cleanup;
421f050a46SStefan Hajnoczi qemu_thread_atexit_add(&pollfds_cleanup_notifier);
431f050a46SStefan Hajnoczi nalloc = 8;
441f050a46SStefan Hajnoczi } else {
451f050a46SStefan Hajnoczi g_assert(nalloc <= INT_MAX);
461f050a46SStefan Hajnoczi nalloc *= 2;
471f050a46SStefan Hajnoczi }
481f050a46SStefan Hajnoczi pollfds = g_renew(GPollFD, pollfds, nalloc);
491f050a46SStefan Hajnoczi nodes = g_renew(AioHandler *, nodes, nalloc);
501f050a46SStefan Hajnoczi }
511f050a46SStefan Hajnoczi nodes[npfd] = node;
521f050a46SStefan Hajnoczi pollfds[npfd] = (GPollFD) {
531f050a46SStefan Hajnoczi .fd = node->pfd.fd,
541f050a46SStefan Hajnoczi .events = node->pfd.events,
551f050a46SStefan Hajnoczi };
561f050a46SStefan Hajnoczi npfd++;
571f050a46SStefan Hajnoczi }
581f050a46SStefan Hajnoczi
fdmon_poll_wait(AioContext * ctx,AioHandlerList * ready_list,int64_t timeout)591f050a46SStefan Hajnoczi static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
601f050a46SStefan Hajnoczi int64_t timeout)
611f050a46SStefan Hajnoczi {
621f050a46SStefan Hajnoczi AioHandler *node;
631f050a46SStefan Hajnoczi int ret;
641f050a46SStefan Hajnoczi
651f050a46SStefan Hajnoczi assert(npfd == 0);
661f050a46SStefan Hajnoczi
671f050a46SStefan Hajnoczi QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
68*60f782b6SStefan Hajnoczi if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
691f050a46SStefan Hajnoczi add_pollfd(node);
701f050a46SStefan Hajnoczi }
711f050a46SStefan Hajnoczi }
721f050a46SStefan Hajnoczi
731f050a46SStefan Hajnoczi /* epoll(7) is faster above a certain number of fds */
741f050a46SStefan Hajnoczi if (fdmon_epoll_try_upgrade(ctx, npfd)) {
755cd9c382SStefan Hajnoczi npfd = 0; /* we won't need pollfds[], reset npfd */
761f050a46SStefan Hajnoczi return ctx->fdmon_ops->wait(ctx, ready_list, timeout);
771f050a46SStefan Hajnoczi }
781f050a46SStefan Hajnoczi
791f050a46SStefan Hajnoczi ret = qemu_poll_ns(pollfds, npfd, timeout);
801f050a46SStefan Hajnoczi if (ret > 0) {
811f050a46SStefan Hajnoczi int i;
821f050a46SStefan Hajnoczi
831f050a46SStefan Hajnoczi for (i = 0; i < npfd; i++) {
841f050a46SStefan Hajnoczi int revents = pollfds[i].revents;
851f050a46SStefan Hajnoczi
861f050a46SStefan Hajnoczi if (revents) {
871f050a46SStefan Hajnoczi aio_add_ready_handler(ready_list, nodes[i], revents);
881f050a46SStefan Hajnoczi }
891f050a46SStefan Hajnoczi }
901f050a46SStefan Hajnoczi }
911f050a46SStefan Hajnoczi
921f050a46SStefan Hajnoczi npfd = 0;
931f050a46SStefan Hajnoczi return ret;
941f050a46SStefan Hajnoczi }
951f050a46SStefan Hajnoczi
fdmon_poll_update(AioContext * ctx,AioHandler * old_node,AioHandler * new_node)96b321051cSStefan Hajnoczi static void fdmon_poll_update(AioContext *ctx,
97b321051cSStefan Hajnoczi AioHandler *old_node,
98b321051cSStefan Hajnoczi AioHandler *new_node)
991f050a46SStefan Hajnoczi {
1001f050a46SStefan Hajnoczi /* Do nothing, AioHandler already contains the state we'll need */
1011f050a46SStefan Hajnoczi }
1021f050a46SStefan Hajnoczi
1031f050a46SStefan Hajnoczi const FDMonOps fdmon_poll_ops = {
1041f050a46SStefan Hajnoczi .update = fdmon_poll_update,
1051f050a46SStefan Hajnoczi .wait = fdmon_poll_wait,
106aa38e19fSStefan Hajnoczi .need_wait = aio_poll_disabled,
1071f050a46SStefan Hajnoczi };
108