xref: /openbmc/qemu/util/qemu-timer.c (revision 99d423e5)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/timer.h"
28 #include "sysemu/replay.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/cpus.h"
31 
32 #ifdef CONFIG_POSIX
33 #include <pthread.h>
34 #endif
35 
36 #ifdef CONFIG_PPOLL
37 #include <poll.h>
38 #endif
39 
40 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
41 #include <sys/prctl.h>
42 #endif
43 
44 /***********************************************************/
45 /* timers */
46 
47 typedef struct QEMUClock {
48     /* We rely on BQL to protect the timerlists */
49     QLIST_HEAD(, QEMUTimerList) timerlists;
50 
51     NotifierList reset_notifiers;
52     int64_t last;
53 
54     QEMUClockType type;
55     bool enabled;
56 } QEMUClock;
57 
58 QEMUTimerListGroup main_loop_tlg;
59 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
60 
61 /* A QEMUTimerList is a list of timers attached to a clock. More
62  * than one QEMUTimerList can be attached to each clock, for instance
63  * used by different AioContexts / threads. Each clock also has
64  * a list of the QEMUTimerLists associated with it, in order that
65  * reenabling the clock can call all the notifiers.
66  */
67 
68 struct QEMUTimerList {
69     QEMUClock *clock;
70     QemuMutex active_timers_lock;
71     QEMUTimer *active_timers;
72     QLIST_ENTRY(QEMUTimerList) list;
73     QEMUTimerListNotifyCB *notify_cb;
74     void *notify_opaque;
75 
76     /* lightweight method to mark the end of timerlist's running */
77     QemuEvent timers_done_ev;
78 };
79 
80 /**
81  * qemu_clock_ptr:
82  * @type: type of clock
83  *
84  * Translate a clock type into a pointer to QEMUClock object.
85  *
86  * Returns: a pointer to the QEMUClock object
87  */
88 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
89 {
90     return &qemu_clocks[type];
91 }
92 
93 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
94 {
95     return timer_head && (timer_head->expire_time <= current_time);
96 }
97 
98 QEMUTimerList *timerlist_new(QEMUClockType type,
99                              QEMUTimerListNotifyCB *cb,
100                              void *opaque)
101 {
102     QEMUTimerList *timer_list;
103     QEMUClock *clock = qemu_clock_ptr(type);
104 
105     timer_list = g_malloc0(sizeof(QEMUTimerList));
106     qemu_event_init(&timer_list->timers_done_ev, true);
107     timer_list->clock = clock;
108     timer_list->notify_cb = cb;
109     timer_list->notify_opaque = opaque;
110     qemu_mutex_init(&timer_list->active_timers_lock);
111     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
112     return timer_list;
113 }
114 
115 void timerlist_free(QEMUTimerList *timer_list)
116 {
117     assert(!timerlist_has_timers(timer_list));
118     if (timer_list->clock) {
119         QLIST_REMOVE(timer_list, list);
120     }
121     qemu_mutex_destroy(&timer_list->active_timers_lock);
122     g_free(timer_list);
123 }
124 
125 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
126 {
127     QEMUClock *clock = qemu_clock_ptr(type);
128 
129     /* Assert that the clock of type TYPE has not been initialized yet. */
130     assert(main_loop_tlg.tl[type] == NULL);
131 
132     clock->type = type;
133     clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
134     clock->last = INT64_MIN;
135     QLIST_INIT(&clock->timerlists);
136     notifier_list_init(&clock->reset_notifiers);
137     main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
138 }
139 
140 bool qemu_clock_use_for_deadline(QEMUClockType type)
141 {
142     return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
143 }
144 
145 void qemu_clock_notify(QEMUClockType type)
146 {
147     QEMUTimerList *timer_list;
148     QEMUClock *clock = qemu_clock_ptr(type);
149     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
150         timerlist_notify(timer_list);
151     }
152 }
153 
154 /* Disabling the clock will wait for related timerlists to stop
155  * executing qemu_run_timers.  Thus, this functions should not
156  * be used from the callback of a timer that is based on @clock.
157  * Doing so would cause a deadlock.
158  *
159  * Caller should hold BQL.
160  */
161 void qemu_clock_enable(QEMUClockType type, bool enabled)
162 {
163     QEMUClock *clock = qemu_clock_ptr(type);
164     QEMUTimerList *tl;
165     bool old = clock->enabled;
166     clock->enabled = enabled;
167     if (enabled && !old) {
168         qemu_clock_notify(type);
169     } else if (!enabled && old) {
170         QLIST_FOREACH(tl, &clock->timerlists, list) {
171             qemu_event_wait(&tl->timers_done_ev);
172         }
173     }
174 }
175 
176 bool timerlist_has_timers(QEMUTimerList *timer_list)
177 {
178     return !!atomic_read(&timer_list->active_timers);
179 }
180 
181 bool qemu_clock_has_timers(QEMUClockType type)
182 {
183     return timerlist_has_timers(
184         main_loop_tlg.tl[type]);
185 }
186 
187 bool timerlist_expired(QEMUTimerList *timer_list)
188 {
189     int64_t expire_time;
190 
191     if (!atomic_read(&timer_list->active_timers)) {
192         return false;
193     }
194 
195     qemu_mutex_lock(&timer_list->active_timers_lock);
196     if (!timer_list->active_timers) {
197         qemu_mutex_unlock(&timer_list->active_timers_lock);
198         return false;
199     }
200     expire_time = timer_list->active_timers->expire_time;
201     qemu_mutex_unlock(&timer_list->active_timers_lock);
202 
203     return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
204 }
205 
206 bool qemu_clock_expired(QEMUClockType type)
207 {
208     return timerlist_expired(
209         main_loop_tlg.tl[type]);
210 }
211 
212 /*
213  * As above, but return -1 for no deadline, and do not cap to 2^32
214  * as we know the result is always positive.
215  */
216 
217 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
218 {
219     int64_t delta;
220     int64_t expire_time;
221 
222     if (!atomic_read(&timer_list->active_timers)) {
223         return -1;
224     }
225 
226     if (!timer_list->clock->enabled) {
227         return -1;
228     }
229 
230     /* The active timers list may be modified before the caller uses our return
231      * value but ->notify_cb() is called when the deadline changes.  Therefore
232      * the caller should notice the change and there is no race condition.
233      */
234     qemu_mutex_lock(&timer_list->active_timers_lock);
235     if (!timer_list->active_timers) {
236         qemu_mutex_unlock(&timer_list->active_timers_lock);
237         return -1;
238     }
239     expire_time = timer_list->active_timers->expire_time;
240     qemu_mutex_unlock(&timer_list->active_timers_lock);
241 
242     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
243 
244     if (delta <= 0) {
245         return 0;
246     }
247 
248     return delta;
249 }
250 
251 /* Calculate the soonest deadline across all timerlists attached
252  * to the clock. This is used for the icount timeout so we
253  * ignore whether or not the clock should be used in deadline
254  * calculations.
255  */
256 int64_t qemu_clock_deadline_ns_all(QEMUClockType type)
257 {
258     int64_t deadline = -1;
259     QEMUTimerList *timer_list;
260     QEMUClock *clock = qemu_clock_ptr(type);
261     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
262         deadline = qemu_soonest_timeout(deadline,
263                                         timerlist_deadline_ns(timer_list));
264     }
265     return deadline;
266 }
267 
268 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
269 {
270     return timer_list->clock->type;
271 }
272 
273 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
274 {
275     return main_loop_tlg.tl[type];
276 }
277 
278 void timerlist_notify(QEMUTimerList *timer_list)
279 {
280     if (timer_list->notify_cb) {
281         timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
282     } else {
283         qemu_notify_event();
284     }
285 }
286 
287 /* Transition function to convert a nanosecond timeout to ms
288  * This is used where a system does not support ppoll
289  */
290 int qemu_timeout_ns_to_ms(int64_t ns)
291 {
292     int64_t ms;
293     if (ns < 0) {
294         return -1;
295     }
296 
297     if (!ns) {
298         return 0;
299     }
300 
301     /* Always round up, because it's better to wait too long than to wait too
302      * little and effectively busy-wait
303      */
304     ms = DIV_ROUND_UP(ns, SCALE_MS);
305 
306     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
307     if (ms > (int64_t) INT32_MAX) {
308         ms = INT32_MAX;
309     }
310 
311     return (int) ms;
312 }
313 
314 
315 /* qemu implementation of g_poll which uses a nanosecond timeout but is
316  * otherwise identical to g_poll
317  */
318 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
319 {
320 #ifdef CONFIG_PPOLL
321     if (timeout < 0) {
322         return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
323     } else {
324         struct timespec ts;
325         int64_t tvsec = timeout / 1000000000LL;
326         /* Avoid possibly overflowing and specifying a negative number of
327          * seconds, which would turn a very long timeout into a busy-wait.
328          */
329         if (tvsec > (int64_t)INT32_MAX) {
330             tvsec = INT32_MAX;
331         }
332         ts.tv_sec = tvsec;
333         ts.tv_nsec = timeout % 1000000000LL;
334         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
335     }
336 #else
337     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
338 #endif
339 }
340 
341 
342 void timer_init_full(QEMUTimer *ts,
343                      QEMUTimerListGroup *timer_list_group, QEMUClockType type,
344                      int scale, int attributes,
345                      QEMUTimerCB *cb, void *opaque)
346 {
347     if (!timer_list_group) {
348         timer_list_group = &main_loop_tlg;
349     }
350     ts->timer_list = timer_list_group->tl[type];
351     ts->cb = cb;
352     ts->opaque = opaque;
353     ts->scale = scale;
354     ts->attributes = attributes;
355     ts->expire_time = -1;
356 }
357 
358 void timer_deinit(QEMUTimer *ts)
359 {
360     assert(ts->expire_time == -1);
361     ts->timer_list = NULL;
362 }
363 
364 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
365 {
366     QEMUTimer **pt, *t;
367 
368     ts->expire_time = -1;
369     pt = &timer_list->active_timers;
370     for(;;) {
371         t = *pt;
372         if (!t)
373             break;
374         if (t == ts) {
375             atomic_set(pt, t->next);
376             break;
377         }
378         pt = &t->next;
379     }
380 }
381 
382 static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
383                                 QEMUTimer *ts, int64_t expire_time)
384 {
385     QEMUTimer **pt, *t;
386 
387     /* add the timer in the sorted list */
388     pt = &timer_list->active_timers;
389     for (;;) {
390         t = *pt;
391         if (!timer_expired_ns(t, expire_time)) {
392             break;
393         }
394         pt = &t->next;
395     }
396     ts->expire_time = MAX(expire_time, 0);
397     ts->next = *pt;
398     atomic_set(pt, ts);
399 
400     return pt == &timer_list->active_timers;
401 }
402 
403 static void timerlist_rearm(QEMUTimerList *timer_list)
404 {
405     /* Interrupt execution to force deadline recalculation.  */
406     if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
407         qemu_start_warp_timer();
408     }
409     timerlist_notify(timer_list);
410 }
411 
412 /* stop a timer, but do not dealloc it */
413 void timer_del(QEMUTimer *ts)
414 {
415     QEMUTimerList *timer_list = ts->timer_list;
416 
417     if (timer_list) {
418         qemu_mutex_lock(&timer_list->active_timers_lock);
419         timer_del_locked(timer_list, ts);
420         qemu_mutex_unlock(&timer_list->active_timers_lock);
421     }
422 }
423 
424 /* modify the current timer so that it will be fired when current_time
425    >= expire_time. The corresponding callback will be called. */
426 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
427 {
428     QEMUTimerList *timer_list = ts->timer_list;
429     bool rearm;
430 
431     qemu_mutex_lock(&timer_list->active_timers_lock);
432     timer_del_locked(timer_list, ts);
433     rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
434     qemu_mutex_unlock(&timer_list->active_timers_lock);
435 
436     if (rearm) {
437         timerlist_rearm(timer_list);
438     }
439 }
440 
441 /* modify the current timer so that it will be fired when current_time
442    >= expire_time or the current deadline, whichever comes earlier.
443    The corresponding callback will be called. */
444 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
445 {
446     QEMUTimerList *timer_list = ts->timer_list;
447     bool rearm;
448 
449     qemu_mutex_lock(&timer_list->active_timers_lock);
450     if (ts->expire_time == -1 || ts->expire_time > expire_time) {
451         if (ts->expire_time != -1) {
452             timer_del_locked(timer_list, ts);
453         }
454         rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
455     } else {
456         rearm = false;
457     }
458     qemu_mutex_unlock(&timer_list->active_timers_lock);
459 
460     if (rearm) {
461         timerlist_rearm(timer_list);
462     }
463 }
464 
465 void timer_mod(QEMUTimer *ts, int64_t expire_time)
466 {
467     timer_mod_ns(ts, expire_time * ts->scale);
468 }
469 
470 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
471 {
472     timer_mod_anticipate_ns(ts, expire_time * ts->scale);
473 }
474 
475 bool timer_pending(QEMUTimer *ts)
476 {
477     return ts->expire_time >= 0;
478 }
479 
480 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
481 {
482     return timer_expired_ns(timer_head, current_time * timer_head->scale);
483 }
484 
485 bool timerlist_run_timers(QEMUTimerList *timer_list)
486 {
487     QEMUTimer *ts;
488     int64_t current_time;
489     bool progress = false;
490     QEMUTimerCB *cb;
491     void *opaque;
492     bool need_replay_checkpoint = false;
493 
494     if (!atomic_read(&timer_list->active_timers)) {
495         return false;
496     }
497 
498     qemu_event_reset(&timer_list->timers_done_ev);
499     if (!timer_list->clock->enabled) {
500         goto out;
501     }
502 
503     switch (timer_list->clock->type) {
504     case QEMU_CLOCK_REALTIME:
505         break;
506     default:
507     case QEMU_CLOCK_VIRTUAL:
508         if (replay_mode != REPLAY_MODE_NONE) {
509             /* Checkpoint for virtual clock is redundant in cases where
510              * it's being triggered with only non-EXTERNAL timers, because
511              * these timers don't change guest state directly.
512              * Since it has conditional dependence on specific timers, it is
513              * subject to race conditions and requires special handling.
514              * See below.
515              */
516             need_replay_checkpoint = true;
517         }
518         break;
519     case QEMU_CLOCK_HOST:
520         if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
521             goto out;
522         }
523         break;
524     case QEMU_CLOCK_VIRTUAL_RT:
525         if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
526             goto out;
527         }
528         break;
529     }
530 
531     /*
532      * Extract expired timers from active timers list and and process them.
533      *
534      * In rr mode we need "filtered" checkpointing for virtual clock.  The
535      * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
536      * and that must only be done once since the clock value stays the same. Because
537      * non-EXTERNAL timers may appear in the timers list while it being processed,
538      * the checkpoint can be issued at a time until no timers are left and we are
539      * done".
540      */
541     current_time = qemu_clock_get_ns(timer_list->clock->type);
542     qemu_mutex_lock(&timer_list->active_timers_lock);
543     while ((ts = timer_list->active_timers)) {
544         if (!timer_expired_ns(ts, current_time)) {
545             /* No expired timers left.  The checkpoint can be skipped
546              * if no timers fired or they were all external.
547              */
548             break;
549         }
550         if (need_replay_checkpoint
551                 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
552             /* once we got here, checkpoint clock only once */
553             need_replay_checkpoint = false;
554             qemu_mutex_unlock(&timer_list->active_timers_lock);
555             if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
556                 goto out;
557             }
558             qemu_mutex_lock(&timer_list->active_timers_lock);
559             /* The lock was released; start over again in case the list was
560              * modified.
561              */
562             continue;
563         }
564 
565         /* remove timer from the list before calling the callback */
566         timer_list->active_timers = ts->next;
567         ts->next = NULL;
568         ts->expire_time = -1;
569         cb = ts->cb;
570         opaque = ts->opaque;
571 
572         /* run the callback (the timer list can be modified) */
573         qemu_mutex_unlock(&timer_list->active_timers_lock);
574         cb(opaque);
575         qemu_mutex_lock(&timer_list->active_timers_lock);
576 
577         progress = true;
578     }
579     qemu_mutex_unlock(&timer_list->active_timers_lock);
580 
581 out:
582     qemu_event_set(&timer_list->timers_done_ev);
583     return progress;
584 }
585 
586 bool qemu_clock_run_timers(QEMUClockType type)
587 {
588     return timerlist_run_timers(main_loop_tlg.tl[type]);
589 }
590 
591 void timerlistgroup_init(QEMUTimerListGroup *tlg,
592                          QEMUTimerListNotifyCB *cb, void *opaque)
593 {
594     QEMUClockType type;
595     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
596         tlg->tl[type] = timerlist_new(type, cb, opaque);
597     }
598 }
599 
600 void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
601 {
602     QEMUClockType type;
603     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
604         timerlist_free(tlg->tl[type]);
605     }
606 }
607 
608 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
609 {
610     QEMUClockType type;
611     bool progress = false;
612     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
613         progress |= timerlist_run_timers(tlg->tl[type]);
614     }
615     return progress;
616 }
617 
618 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
619 {
620     int64_t deadline = -1;
621     QEMUClockType type;
622     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
623         if (qemu_clock_use_for_deadline(type)) {
624             deadline = qemu_soonest_timeout(deadline,
625                                             timerlist_deadline_ns(tlg->tl[type]));
626         }
627     }
628     return deadline;
629 }
630 
631 int64_t qemu_clock_get_ns(QEMUClockType type)
632 {
633     int64_t now, last;
634     QEMUClock *clock = qemu_clock_ptr(type);
635 
636     switch (type) {
637     case QEMU_CLOCK_REALTIME:
638         return get_clock();
639     default:
640     case QEMU_CLOCK_VIRTUAL:
641         if (use_icount) {
642             return cpu_get_icount();
643         } else {
644             return cpu_get_clock();
645         }
646     case QEMU_CLOCK_HOST:
647         now = REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
648         last = clock->last;
649         clock->last = now;
650         if (now < last || now > (last + get_max_clock_jump())) {
651             notifier_list_notify(&clock->reset_notifiers, &now);
652         }
653         return now;
654     case QEMU_CLOCK_VIRTUAL_RT:
655         return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
656     }
657 }
658 
659 uint64_t qemu_clock_get_last(QEMUClockType type)
660 {
661     QEMUClock *clock = qemu_clock_ptr(type);
662     return clock->last;
663 }
664 
665 void qemu_clock_set_last(QEMUClockType type, uint64_t last)
666 {
667     QEMUClock *clock = qemu_clock_ptr(type);
668     clock->last = last;
669 }
670 
671 void qemu_clock_register_reset_notifier(QEMUClockType type,
672                                         Notifier *notifier)
673 {
674     QEMUClock *clock = qemu_clock_ptr(type);
675     notifier_list_add(&clock->reset_notifiers, notifier);
676 }
677 
678 void qemu_clock_unregister_reset_notifier(QEMUClockType type,
679                                           Notifier *notifier)
680 {
681     notifier_remove(notifier);
682 }
683 
684 void init_clocks(QEMUTimerListNotifyCB *notify_cb)
685 {
686     QEMUClockType type;
687     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
688         qemu_clock_init(type, notify_cb);
689     }
690 
691 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
692     prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
693 #endif
694 }
695 
696 uint64_t timer_expire_time_ns(QEMUTimer *ts)
697 {
698     return timer_pending(ts) ? ts->expire_time : -1;
699 }
700 
701 bool qemu_clock_run_all_timers(void)
702 {
703     bool progress = false;
704     QEMUClockType type;
705 
706     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
707         if (qemu_clock_use_for_deadline(type)) {
708             progress |= qemu_clock_run_timers(type);
709         }
710     }
711 
712     return progress;
713 }
714