xref: /openbmc/qemu/util/main-loop.c (revision dc8d6cf2033c813ade9863a926f2d71a22edd249)
1  /*
2   * QEMU System Emulator
3   *
4   * Copyright (c) 2003-2008 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  #include "qapi/error.h"
27  #include "qemu/cutils.h"
28  #include "qemu/timer.h"
29  #include "sysemu/cpu-timers.h"
30  #include "sysemu/replay.h"
31  #include "qemu/main-loop.h"
32  #include "block/aio.h"
33  #include "block/thread-pool.h"
34  #include "qemu/error-report.h"
35  #include "qemu/queue.h"
36  #include "qom/object.h"
37  
38  #ifndef _WIN32
39  #include <sys/wait.h>
40  #endif
41  
42  #ifndef _WIN32
43  
44  /* If we have signalfd, we mask out the signals we want to handle and then
45   * use signalfd to listen for them.  We rely on whatever the current signal
46   * handler is to dispatch the signals when we receive them.
47   */
48  /*
49   * Disable CFI checks.
50   * We are going to call a signal hander directly. Such handler may or may not
51   * have been defined in our binary, so there's no guarantee that the pointer
52   * used to set the handler is a cfi-valid pointer. Since the handlers are
53   * stored in kernel memory, changing the handler to an attacker-defined
54   * function requires being able to call a sigaction() syscall,
55   * which is not as easy as overwriting a pointer in memory.
56   */
57  QEMU_DISABLE_CFI
58  static void sigfd_handler(void *opaque)
59  {
60      int fd = (intptr_t)opaque;
61      struct qemu_signalfd_siginfo info;
62      struct sigaction action;
63      ssize_t len;
64  
65      while (1) {
66          len = RETRY_ON_EINTR(read(fd, &info, sizeof(info)));
67  
68          if (len == -1 && errno == EAGAIN) {
69              break;
70          }
71  
72          if (len != sizeof(info)) {
73              error_report("read from sigfd returned %zd: %s", len,
74                           g_strerror(errno));
75              return;
76          }
77  
78          sigaction(info.ssi_signo, NULL, &action);
79          if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
80              sigaction_invoke(&action, &info);
81          } else if (action.sa_handler) {
82              action.sa_handler(info.ssi_signo);
83          }
84      }
85  }
86  
87  static int qemu_signal_init(Error **errp)
88  {
89      int sigfd;
90      sigset_t set;
91  
92      /*
93       * SIG_IPI must be blocked in the main thread and must not be caught
94       * by sigwait() in the signal thread. Otherwise, the cpu thread will
95       * not catch it reliably.
96       */
97      sigemptyset(&set);
98      sigaddset(&set, SIG_IPI);
99      sigaddset(&set, SIGIO);
100      sigaddset(&set, SIGALRM);
101      sigaddset(&set, SIGBUS);
102      /* SIGINT cannot be handled via signalfd, so that ^C can be used
103       * to interrupt QEMU when it is being run under gdb.  SIGHUP and
104       * SIGTERM are also handled asynchronously, even though it is not
105       * strictly necessary, because they use the same handler as SIGINT.
106       */
107      pthread_sigmask(SIG_BLOCK, &set, NULL);
108  
109      sigdelset(&set, SIG_IPI);
110      sigfd = qemu_signalfd(&set);
111      if (sigfd == -1) {
112          error_setg_errno(errp, errno, "failed to create signalfd");
113          return -errno;
114      }
115  
116      g_unix_set_fd_nonblocking(sigfd, true, NULL);
117  
118      qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
119  
120      return 0;
121  }
122  
123  #else /* _WIN32 */
124  
125  static int qemu_signal_init(Error **errp)
126  {
127      return 0;
128  }
129  #endif
130  
131  static AioContext *qemu_aio_context;
132  static QEMUBH *qemu_notify_bh;
133  
134  static void notify_event_cb(void *opaque)
135  {
136      /* No need to do anything; this bottom half is only used to
137       * kick the kernel out of ppoll/poll/WaitForMultipleObjects.
138       */
139  }
140  
141  AioContext *qemu_get_aio_context(void)
142  {
143      return qemu_aio_context;
144  }
145  
146  void qemu_notify_event(void)
147  {
148      if (!qemu_aio_context) {
149          return;
150      }
151      qemu_bh_schedule(qemu_notify_bh);
152  }
153  
154  static GArray *gpollfds;
155  
156  int qemu_init_main_loop(Error **errp)
157  {
158      int ret;
159      GSource *src;
160  
161      init_clocks(qemu_timer_notify_cb);
162  
163      ret = qemu_signal_init(errp);
164      if (ret) {
165          return ret;
166      }
167  
168      qemu_aio_context = aio_context_new(errp);
169      if (!qemu_aio_context) {
170          return -EMFILE;
171      }
172      qemu_set_current_aio_context(qemu_aio_context);
173      qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
174      gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
175      src = aio_get_g_source(qemu_aio_context);
176      g_source_set_name(src, "aio-context");
177      g_source_attach(src, NULL);
178      g_source_unref(src);
179      src = iohandler_get_g_source();
180      g_source_set_name(src, "io-handler");
181      g_source_attach(src, NULL);
182      g_source_unref(src);
183      return 0;
184  }
185  
186  static void main_loop_update_params(EventLoopBase *base, Error **errp)
187  {
188      ERRP_GUARD();
189  
190      if (!qemu_aio_context) {
191          error_setg(errp, "qemu aio context not ready");
192          return;
193      }
194  
195      aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
196      if (*errp) {
197          return;
198      }
199  
200      aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min,
201                                         base->thread_pool_max, errp);
202  }
203  
204  MainLoop *mloop;
205  
206  static void main_loop_init(EventLoopBase *base, Error **errp)
207  {
208      MainLoop *m = MAIN_LOOP(base);
209  
210      if (mloop) {
211          error_setg(errp, "only one main-loop instance allowed");
212          return;
213      }
214  
215      main_loop_update_params(base, errp);
216  
217      mloop = m;
218      return;
219  }
220  
221  static bool main_loop_can_be_deleted(EventLoopBase *base)
222  {
223      return false;
224  }
225  
226  static void main_loop_class_init(ObjectClass *oc, void *class_data)
227  {
228      EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc);
229  
230      bc->init = main_loop_init;
231      bc->update_params = main_loop_update_params;
232      bc->can_be_deleted = main_loop_can_be_deleted;
233  }
234  
235  static const TypeInfo main_loop_info = {
236      .name = TYPE_MAIN_LOOP,
237      .parent = TYPE_EVENT_LOOP_BASE,
238      .class_init = main_loop_class_init,
239      .instance_size = sizeof(MainLoop),
240  };
241  
242  static void main_loop_register_types(void)
243  {
244      type_register_static(&main_loop_info);
245  }
246  
247  type_init(main_loop_register_types)
248  
249  static int max_priority;
250  
251  #ifndef _WIN32
252  static int glib_pollfds_idx;
253  static int glib_n_poll_fds;
254  
255  void qemu_fd_register(int fd)
256  {
257  }
258  
259  static void glib_pollfds_fill(int64_t *cur_timeout)
260  {
261      GMainContext *context = g_main_context_default();
262      int timeout = 0;
263      int64_t timeout_ns;
264      int n;
265  
266      g_main_context_prepare(context, &max_priority);
267  
268      glib_pollfds_idx = gpollfds->len;
269      n = glib_n_poll_fds;
270      do {
271          GPollFD *pfds;
272          glib_n_poll_fds = n;
273          g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
274          pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
275          n = g_main_context_query(context, max_priority, &timeout, pfds,
276                                   glib_n_poll_fds);
277      } while (n != glib_n_poll_fds);
278  
279      if (timeout < 0) {
280          timeout_ns = -1;
281      } else {
282          timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
283      }
284  
285      *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
286  }
287  
288  static void glib_pollfds_poll(void)
289  {
290      GMainContext *context = g_main_context_default();
291      GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
292  
293      if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
294          g_main_context_dispatch(context);
295      }
296  }
297  
298  #define MAX_MAIN_LOOP_SPIN (1000)
299  
300  static int os_host_main_loop_wait(int64_t timeout)
301  {
302      GMainContext *context = g_main_context_default();
303      int ret;
304  
305      g_main_context_acquire(context);
306  
307      glib_pollfds_fill(&timeout);
308  
309      qemu_mutex_unlock_iothread();
310      replay_mutex_unlock();
311  
312      ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
313  
314      replay_mutex_lock();
315      qemu_mutex_lock_iothread();
316  
317      glib_pollfds_poll();
318  
319      g_main_context_release(context);
320  
321      return ret;
322  }
323  #else
324  /***********************************************************/
325  /* Polling handling */
326  
327  typedef struct PollingEntry {
328      PollingFunc *func;
329      void *opaque;
330      struct PollingEntry *next;
331  } PollingEntry;
332  
333  static PollingEntry *first_polling_entry;
334  
335  int qemu_add_polling_cb(PollingFunc *func, void *opaque)
336  {
337      PollingEntry **ppe, *pe;
338      pe = g_new0(PollingEntry, 1);
339      pe->func = func;
340      pe->opaque = opaque;
341      for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
342      *ppe = pe;
343      return 0;
344  }
345  
346  void qemu_del_polling_cb(PollingFunc *func, void *opaque)
347  {
348      PollingEntry **ppe, *pe;
349      for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
350          pe = *ppe;
351          if (pe->func == func && pe->opaque == opaque) {
352              *ppe = pe->next;
353              g_free(pe);
354              break;
355          }
356      }
357  }
358  
359  /***********************************************************/
360  /* Wait objects support */
361  typedef struct WaitObjects {
362      int num;
363      int revents[MAXIMUM_WAIT_OBJECTS];
364      HANDLE events[MAXIMUM_WAIT_OBJECTS];
365      WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS];
366      void *opaque[MAXIMUM_WAIT_OBJECTS];
367  } WaitObjects;
368  
369  static WaitObjects wait_objects = {0};
370  
371  int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
372  {
373      int i;
374      WaitObjects *w = &wait_objects;
375  
376      if (w->num >= MAXIMUM_WAIT_OBJECTS) {
377          return -1;
378      }
379  
380      for (i = 0; i < w->num; i++) {
381          /* check if the same handle is added twice */
382          if (w->events[i] == handle) {
383              return -1;
384          }
385      }
386  
387      w->events[w->num] = handle;
388      w->func[w->num] = func;
389      w->opaque[w->num] = opaque;
390      w->revents[w->num] = 0;
391      w->num++;
392      return 0;
393  }
394  
395  void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
396  {
397      int i, found;
398      WaitObjects *w = &wait_objects;
399  
400      found = 0;
401      for (i = 0; i < w->num; i++) {
402          if (w->events[i] == handle) {
403              found = 1;
404          }
405          if (found && i < (MAXIMUM_WAIT_OBJECTS - 1)) {
406              w->events[i] = w->events[i + 1];
407              w->func[i] = w->func[i + 1];
408              w->opaque[i] = w->opaque[i + 1];
409              w->revents[i] = w->revents[i + 1];
410          }
411      }
412      if (found) {
413          w->num--;
414      }
415  }
416  
417  void qemu_fd_register(int fd)
418  {
419      WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
420                     FD_READ | FD_ACCEPT | FD_CLOSE |
421                     FD_CONNECT | FD_WRITE | FD_OOB);
422  }
423  
424  static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
425                          fd_set *xfds)
426  {
427      int nfds = -1;
428      int i;
429  
430      for (i = 0; i < pollfds->len; i++) {
431          GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
432          int fd = pfd->fd;
433          int events = pfd->events;
434          if (events & G_IO_IN) {
435              FD_SET(fd, rfds);
436              nfds = MAX(nfds, fd);
437          }
438          if (events & G_IO_OUT) {
439              FD_SET(fd, wfds);
440              nfds = MAX(nfds, fd);
441          }
442          if (events & G_IO_PRI) {
443              FD_SET(fd, xfds);
444              nfds = MAX(nfds, fd);
445          }
446      }
447      return nfds;
448  }
449  
450  static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
451                           fd_set *wfds, fd_set *xfds)
452  {
453      int i;
454  
455      for (i = 0; i < pollfds->len; i++) {
456          GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
457          int fd = pfd->fd;
458          int revents = 0;
459  
460          if (FD_ISSET(fd, rfds)) {
461              revents |= G_IO_IN;
462          }
463          if (FD_ISSET(fd, wfds)) {
464              revents |= G_IO_OUT;
465          }
466          if (FD_ISSET(fd, xfds)) {
467              revents |= G_IO_PRI;
468          }
469          pfd->revents = revents & pfd->events;
470      }
471  }
472  
473  static int os_host_main_loop_wait(int64_t timeout)
474  {
475      GMainContext *context = g_main_context_default();
476      GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
477      int select_ret = 0;
478      int g_poll_ret, ret, i, n_poll_fds;
479      PollingEntry *pe;
480      WaitObjects *w = &wait_objects;
481      gint poll_timeout;
482      int64_t poll_timeout_ns;
483      static struct timeval tv0;
484      fd_set rfds, wfds, xfds;
485      int nfds;
486  
487      g_main_context_acquire(context);
488  
489      /* XXX: need to suppress polling by better using win32 events */
490      ret = 0;
491      for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
492          ret |= pe->func(pe->opaque);
493      }
494      if (ret != 0) {
495          g_main_context_release(context);
496          return ret;
497      }
498  
499      FD_ZERO(&rfds);
500      FD_ZERO(&wfds);
501      FD_ZERO(&xfds);
502      nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
503      if (nfds >= 0) {
504          select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
505          if (select_ret != 0) {
506              timeout = 0;
507          }
508          if (select_ret > 0) {
509              pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
510          }
511      }
512  
513      g_main_context_prepare(context, &max_priority);
514      n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
515                                        poll_fds, ARRAY_SIZE(poll_fds));
516      g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
517  
518      for (i = 0; i < w->num; i++) {
519          poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
520          poll_fds[n_poll_fds + i].events = G_IO_IN;
521      }
522  
523      if (poll_timeout < 0) {
524          poll_timeout_ns = -1;
525      } else {
526          poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
527      }
528  
529      poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
530  
531      qemu_mutex_unlock_iothread();
532  
533      replay_mutex_unlock();
534  
535      g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
536  
537      replay_mutex_lock();
538  
539      qemu_mutex_lock_iothread();
540      if (g_poll_ret > 0) {
541          for (i = 0; i < w->num; i++) {
542              w->revents[i] = poll_fds[n_poll_fds + i].revents;
543          }
544          for (i = 0; i < w->num; i++) {
545              if (w->revents[i] && w->func[i]) {
546                  w->func[i](w->opaque[i]);
547              }
548          }
549      }
550  
551      if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
552          g_main_context_dispatch(context);
553      }
554  
555      g_main_context_release(context);
556  
557      return select_ret || g_poll_ret;
558  }
559  #endif
560  
561  static NotifierList main_loop_poll_notifiers =
562      NOTIFIER_LIST_INITIALIZER(main_loop_poll_notifiers);
563  
564  void main_loop_poll_add_notifier(Notifier *notify)
565  {
566      notifier_list_add(&main_loop_poll_notifiers, notify);
567  }
568  
569  void main_loop_poll_remove_notifier(Notifier *notify)
570  {
571      notifier_remove(notify);
572  }
573  
574  void main_loop_wait(int nonblocking)
575  {
576      MainLoopPoll mlpoll = {
577          .state = MAIN_LOOP_POLL_FILL,
578          .timeout = UINT32_MAX,
579          .pollfds = gpollfds,
580      };
581      int ret;
582      int64_t timeout_ns;
583  
584      if (nonblocking) {
585          mlpoll.timeout = 0;
586      }
587  
588      /* poll any events */
589      g_array_set_size(gpollfds, 0); /* reset for new iteration */
590      /* XXX: separate device handlers from system ones */
591      notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
592  
593      if (mlpoll.timeout == UINT32_MAX) {
594          timeout_ns = -1;
595      } else {
596          timeout_ns = (uint64_t)mlpoll.timeout * (int64_t)(SCALE_MS);
597      }
598  
599      timeout_ns = qemu_soonest_timeout(timeout_ns,
600                                        timerlistgroup_deadline_ns(
601                                            &main_loop_tlg));
602  
603      ret = os_host_main_loop_wait(timeout_ns);
604      mlpoll.state = ret < 0 ? MAIN_LOOP_POLL_ERR : MAIN_LOOP_POLL_OK;
605      notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
606  
607      if (icount_enabled()) {
608          /*
609           * CPU thread can infinitely wait for event after
610           * missing the warp
611           */
612          icount_start_warp_timer();
613      }
614      qemu_clock_run_all_timers();
615  }
616  
617  /* Functions to operate on the main QEMU AioContext.  */
618  
619  QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
620  {
621      return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
622  }
623  
624  /*
625   * Functions to operate on the I/O handler AioContext.
626   * This context runs on top of main loop. We can't reuse qemu_aio_context
627   * because iohandlers mustn't be polled by aio_poll(qemu_aio_context).
628   */
629  static AioContext *iohandler_ctx;
630  
631  static void iohandler_init(void)
632  {
633      if (!iohandler_ctx) {
634          iohandler_ctx = aio_context_new(&error_abort);
635      }
636  }
637  
638  AioContext *iohandler_get_aio_context(void)
639  {
640      iohandler_init();
641      return iohandler_ctx;
642  }
643  
644  GSource *iohandler_get_g_source(void)
645  {
646      iohandler_init();
647      return aio_get_g_source(iohandler_ctx);
648  }
649  
650  void qemu_set_fd_handler(int fd,
651                           IOHandler *fd_read,
652                           IOHandler *fd_write,
653                           void *opaque)
654  {
655      iohandler_init();
656      aio_set_fd_handler(iohandler_ctx, fd, false,
657                         fd_read, fd_write, NULL, NULL, opaque);
658  }
659  
660  void event_notifier_set_handler(EventNotifier *e,
661                                  EventNotifierHandler *handler)
662  {
663      iohandler_init();
664      aio_set_event_notifier(iohandler_ctx, e, false,
665                             handler, NULL, NULL);
666  }
667