xref: /openbmc/qemu/util/qemu-timer.c (revision 59a3a1c0)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/timer.h"
28 #include "sysemu/replay.h"
29 #include "sysemu/cpus.h"
30 
31 #ifdef CONFIG_POSIX
32 #include <pthread.h>
33 #endif
34 
35 #ifdef CONFIG_PPOLL
36 #include <poll.h>
37 #endif
38 
39 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
40 #include <sys/prctl.h>
41 #endif
42 
43 /***********************************************************/
44 /* timers */
45 
46 typedef struct QEMUClock {
47     /* We rely on BQL to protect the timerlists */
48     QLIST_HEAD(, QEMUTimerList) timerlists;
49 
50     NotifierList reset_notifiers;
51     int64_t last;
52 
53     QEMUClockType type;
54     bool enabled;
55 } QEMUClock;
56 
57 QEMUTimerListGroup main_loop_tlg;
58 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
59 
60 /* A QEMUTimerList is a list of timers attached to a clock. More
61  * than one QEMUTimerList can be attached to each clock, for instance
62  * used by different AioContexts / threads. Each clock also has
63  * a list of the QEMUTimerLists associated with it, in order that
64  * reenabling the clock can call all the notifiers.
65  */
66 
67 struct QEMUTimerList {
68     QEMUClock *clock;
69     QemuMutex active_timers_lock;
70     QEMUTimer *active_timers;
71     QLIST_ENTRY(QEMUTimerList) list;
72     QEMUTimerListNotifyCB *notify_cb;
73     void *notify_opaque;
74 
75     /* lightweight method to mark the end of timerlist's running */
76     QemuEvent timers_done_ev;
77 };
78 
79 /**
80  * qemu_clock_ptr:
81  * @type: type of clock
82  *
83  * Translate a clock type into a pointer to QEMUClock object.
84  *
85  * Returns: a pointer to the QEMUClock object
86  */
87 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
88 {
89     return &qemu_clocks[type];
90 }
91 
92 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
93 {
94     return timer_head && (timer_head->expire_time <= current_time);
95 }
96 
97 QEMUTimerList *timerlist_new(QEMUClockType type,
98                              QEMUTimerListNotifyCB *cb,
99                              void *opaque)
100 {
101     QEMUTimerList *timer_list;
102     QEMUClock *clock = qemu_clock_ptr(type);
103 
104     timer_list = g_malloc0(sizeof(QEMUTimerList));
105     qemu_event_init(&timer_list->timers_done_ev, true);
106     timer_list->clock = clock;
107     timer_list->notify_cb = cb;
108     timer_list->notify_opaque = opaque;
109     qemu_mutex_init(&timer_list->active_timers_lock);
110     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
111     return timer_list;
112 }
113 
114 void timerlist_free(QEMUTimerList *timer_list)
115 {
116     assert(!timerlist_has_timers(timer_list));
117     if (timer_list->clock) {
118         QLIST_REMOVE(timer_list, list);
119     }
120     qemu_mutex_destroy(&timer_list->active_timers_lock);
121     g_free(timer_list);
122 }
123 
124 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
125 {
126     QEMUClock *clock = qemu_clock_ptr(type);
127 
128     /* Assert that the clock of type TYPE has not been initialized yet. */
129     assert(main_loop_tlg.tl[type] == NULL);
130 
131     clock->type = type;
132     clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
133     clock->last = INT64_MIN;
134     QLIST_INIT(&clock->timerlists);
135     notifier_list_init(&clock->reset_notifiers);
136     main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
137 }
138 
139 bool qemu_clock_use_for_deadline(QEMUClockType type)
140 {
141     return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
142 }
143 
144 void qemu_clock_notify(QEMUClockType type)
145 {
146     QEMUTimerList *timer_list;
147     QEMUClock *clock = qemu_clock_ptr(type);
148     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
149         timerlist_notify(timer_list);
150     }
151 }
152 
153 /* Disabling the clock will wait for related timerlists to stop
154  * executing qemu_run_timers.  Thus, this functions should not
155  * be used from the callback of a timer that is based on @clock.
156  * Doing so would cause a deadlock.
157  *
158  * Caller should hold BQL.
159  */
160 void qemu_clock_enable(QEMUClockType type, bool enabled)
161 {
162     QEMUClock *clock = qemu_clock_ptr(type);
163     QEMUTimerList *tl;
164     bool old = clock->enabled;
165     clock->enabled = enabled;
166     if (enabled && !old) {
167         qemu_clock_notify(type);
168     } else if (!enabled && old) {
169         QLIST_FOREACH(tl, &clock->timerlists, list) {
170             qemu_event_wait(&tl->timers_done_ev);
171         }
172     }
173 }
174 
175 bool timerlist_has_timers(QEMUTimerList *timer_list)
176 {
177     return !!atomic_read(&timer_list->active_timers);
178 }
179 
180 bool qemu_clock_has_timers(QEMUClockType type)
181 {
182     return timerlist_has_timers(
183         main_loop_tlg.tl[type]);
184 }
185 
186 bool timerlist_expired(QEMUTimerList *timer_list)
187 {
188     int64_t expire_time;
189 
190     if (!atomic_read(&timer_list->active_timers)) {
191         return false;
192     }
193 
194     qemu_mutex_lock(&timer_list->active_timers_lock);
195     if (!timer_list->active_timers) {
196         qemu_mutex_unlock(&timer_list->active_timers_lock);
197         return false;
198     }
199     expire_time = timer_list->active_timers->expire_time;
200     qemu_mutex_unlock(&timer_list->active_timers_lock);
201 
202     return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
203 }
204 
205 bool qemu_clock_expired(QEMUClockType type)
206 {
207     return timerlist_expired(
208         main_loop_tlg.tl[type]);
209 }
210 
211 /*
212  * As above, but return -1 for no deadline, and do not cap to 2^32
213  * as we know the result is always positive.
214  */
215 
216 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
217 {
218     int64_t delta;
219     int64_t expire_time;
220 
221     if (!atomic_read(&timer_list->active_timers)) {
222         return -1;
223     }
224 
225     if (!timer_list->clock->enabled) {
226         return -1;
227     }
228 
229     /* The active timers list may be modified before the caller uses our return
230      * value but ->notify_cb() is called when the deadline changes.  Therefore
231      * the caller should notice the change and there is no race condition.
232      */
233     qemu_mutex_lock(&timer_list->active_timers_lock);
234     if (!timer_list->active_timers) {
235         qemu_mutex_unlock(&timer_list->active_timers_lock);
236         return -1;
237     }
238     expire_time = timer_list->active_timers->expire_time;
239     qemu_mutex_unlock(&timer_list->active_timers_lock);
240 
241     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
242 
243     if (delta <= 0) {
244         return 0;
245     }
246 
247     return delta;
248 }
249 
250 /* Calculate the soonest deadline across all timerlists attached
251  * to the clock. This is used for the icount timeout so we
252  * ignore whether or not the clock should be used in deadline
253  * calculations.
254  */
255 int64_t qemu_clock_deadline_ns_all(QEMUClockType type)
256 {
257     int64_t deadline = -1;
258     QEMUTimerList *timer_list;
259     QEMUClock *clock = qemu_clock_ptr(type);
260     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
261         deadline = qemu_soonest_timeout(deadline,
262                                         timerlist_deadline_ns(timer_list));
263     }
264     return deadline;
265 }
266 
267 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
268 {
269     return timer_list->clock->type;
270 }
271 
272 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
273 {
274     return main_loop_tlg.tl[type];
275 }
276 
277 void timerlist_notify(QEMUTimerList *timer_list)
278 {
279     if (timer_list->notify_cb) {
280         timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
281     } else {
282         qemu_notify_event();
283     }
284 }
285 
286 /* Transition function to convert a nanosecond timeout to ms
287  * This is used where a system does not support ppoll
288  */
289 int qemu_timeout_ns_to_ms(int64_t ns)
290 {
291     int64_t ms;
292     if (ns < 0) {
293         return -1;
294     }
295 
296     if (!ns) {
297         return 0;
298     }
299 
300     /* Always round up, because it's better to wait too long than to wait too
301      * little and effectively busy-wait
302      */
303     ms = DIV_ROUND_UP(ns, SCALE_MS);
304 
305     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
306     if (ms > (int64_t) INT32_MAX) {
307         ms = INT32_MAX;
308     }
309 
310     return (int) ms;
311 }
312 
313 
314 /* qemu implementation of g_poll which uses a nanosecond timeout but is
315  * otherwise identical to g_poll
316  */
317 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
318 {
319 #ifdef CONFIG_PPOLL
320     if (timeout < 0) {
321         return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
322     } else {
323         struct timespec ts;
324         int64_t tvsec = timeout / 1000000000LL;
325         /* Avoid possibly overflowing and specifying a negative number of
326          * seconds, which would turn a very long timeout into a busy-wait.
327          */
328         if (tvsec > (int64_t)INT32_MAX) {
329             tvsec = INT32_MAX;
330         }
331         ts.tv_sec = tvsec;
332         ts.tv_nsec = timeout % 1000000000LL;
333         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
334     }
335 #else
336     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
337 #endif
338 }
339 
340 
341 void timer_init_full(QEMUTimer *ts,
342                      QEMUTimerListGroup *timer_list_group, QEMUClockType type,
343                      int scale, int attributes,
344                      QEMUTimerCB *cb, void *opaque)
345 {
346     if (!timer_list_group) {
347         timer_list_group = &main_loop_tlg;
348     }
349     ts->timer_list = timer_list_group->tl[type];
350     ts->cb = cb;
351     ts->opaque = opaque;
352     ts->scale = scale;
353     ts->attributes = attributes;
354     ts->expire_time = -1;
355 }
356 
357 void timer_deinit(QEMUTimer *ts)
358 {
359     assert(ts->expire_time == -1);
360     ts->timer_list = NULL;
361 }
362 
363 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
364 {
365     QEMUTimer **pt, *t;
366 
367     ts->expire_time = -1;
368     pt = &timer_list->active_timers;
369     for(;;) {
370         t = *pt;
371         if (!t)
372             break;
373         if (t == ts) {
374             atomic_set(pt, t->next);
375             break;
376         }
377         pt = &t->next;
378     }
379 }
380 
381 static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
382                                 QEMUTimer *ts, int64_t expire_time)
383 {
384     QEMUTimer **pt, *t;
385 
386     /* add the timer in the sorted list */
387     pt = &timer_list->active_timers;
388     for (;;) {
389         t = *pt;
390         if (!timer_expired_ns(t, expire_time)) {
391             break;
392         }
393         pt = &t->next;
394     }
395     ts->expire_time = MAX(expire_time, 0);
396     ts->next = *pt;
397     atomic_set(pt, ts);
398 
399     return pt == &timer_list->active_timers;
400 }
401 
402 static void timerlist_rearm(QEMUTimerList *timer_list)
403 {
404     /* Interrupt execution to force deadline recalculation.  */
405     if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
406         qemu_start_warp_timer();
407     }
408     timerlist_notify(timer_list);
409 }
410 
411 /* stop a timer, but do not dealloc it */
412 void timer_del(QEMUTimer *ts)
413 {
414     QEMUTimerList *timer_list = ts->timer_list;
415 
416     if (timer_list) {
417         qemu_mutex_lock(&timer_list->active_timers_lock);
418         timer_del_locked(timer_list, ts);
419         qemu_mutex_unlock(&timer_list->active_timers_lock);
420     }
421 }
422 
423 /* modify the current timer so that it will be fired when current_time
424    >= expire_time. The corresponding callback will be called. */
425 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
426 {
427     QEMUTimerList *timer_list = ts->timer_list;
428     bool rearm;
429 
430     qemu_mutex_lock(&timer_list->active_timers_lock);
431     timer_del_locked(timer_list, ts);
432     rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
433     qemu_mutex_unlock(&timer_list->active_timers_lock);
434 
435     if (rearm) {
436         timerlist_rearm(timer_list);
437     }
438 }
439 
440 /* modify the current timer so that it will be fired when current_time
441    >= expire_time or the current deadline, whichever comes earlier.
442    The corresponding callback will be called. */
443 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
444 {
445     QEMUTimerList *timer_list = ts->timer_list;
446     bool rearm;
447 
448     qemu_mutex_lock(&timer_list->active_timers_lock);
449     if (ts->expire_time == -1 || ts->expire_time > expire_time) {
450         if (ts->expire_time != -1) {
451             timer_del_locked(timer_list, ts);
452         }
453         rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
454     } else {
455         rearm = false;
456     }
457     qemu_mutex_unlock(&timer_list->active_timers_lock);
458 
459     if (rearm) {
460         timerlist_rearm(timer_list);
461     }
462 }
463 
464 void timer_mod(QEMUTimer *ts, int64_t expire_time)
465 {
466     timer_mod_ns(ts, expire_time * ts->scale);
467 }
468 
469 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
470 {
471     timer_mod_anticipate_ns(ts, expire_time * ts->scale);
472 }
473 
474 bool timer_pending(QEMUTimer *ts)
475 {
476     return ts->expire_time >= 0;
477 }
478 
479 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
480 {
481     return timer_expired_ns(timer_head, current_time * timer_head->scale);
482 }
483 
484 bool timerlist_run_timers(QEMUTimerList *timer_list)
485 {
486     QEMUTimer *ts;
487     int64_t current_time;
488     bool progress = false;
489     QEMUTimerCB *cb;
490     void *opaque;
491     bool need_replay_checkpoint = false;
492 
493     if (!atomic_read(&timer_list->active_timers)) {
494         return false;
495     }
496 
497     qemu_event_reset(&timer_list->timers_done_ev);
498     if (!timer_list->clock->enabled) {
499         goto out;
500     }
501 
502     switch (timer_list->clock->type) {
503     case QEMU_CLOCK_REALTIME:
504         break;
505     default:
506     case QEMU_CLOCK_VIRTUAL:
507         if (replay_mode != REPLAY_MODE_NONE) {
508             /* Checkpoint for virtual clock is redundant in cases where
509              * it's being triggered with only non-EXTERNAL timers, because
510              * these timers don't change guest state directly.
511              * Since it has conditional dependence on specific timers, it is
512              * subject to race conditions and requires special handling.
513              * See below.
514              */
515             need_replay_checkpoint = true;
516         }
517         break;
518     case QEMU_CLOCK_HOST:
519         if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
520             goto out;
521         }
522         break;
523     case QEMU_CLOCK_VIRTUAL_RT:
524         if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
525             goto out;
526         }
527         break;
528     }
529 
530     /*
531      * Extract expired timers from active timers list and and process them.
532      *
533      * In rr mode we need "filtered" checkpointing for virtual clock.  The
534      * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
535      * and that must only be done once since the clock value stays the same. Because
536      * non-EXTERNAL timers may appear in the timers list while it being processed,
537      * the checkpoint can be issued at a time until no timers are left and we are
538      * done".
539      */
540     current_time = qemu_clock_get_ns(timer_list->clock->type);
541     qemu_mutex_lock(&timer_list->active_timers_lock);
542     while ((ts = timer_list->active_timers)) {
543         if (!timer_expired_ns(ts, current_time)) {
544             /* No expired timers left.  The checkpoint can be skipped
545              * if no timers fired or they were all external.
546              */
547             break;
548         }
549         if (need_replay_checkpoint
550                 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
551             /* once we got here, checkpoint clock only once */
552             need_replay_checkpoint = false;
553             qemu_mutex_unlock(&timer_list->active_timers_lock);
554             if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
555                 goto out;
556             }
557             qemu_mutex_lock(&timer_list->active_timers_lock);
558             /* The lock was released; start over again in case the list was
559              * modified.
560              */
561             continue;
562         }
563 
564         /* remove timer from the list before calling the callback */
565         timer_list->active_timers = ts->next;
566         ts->next = NULL;
567         ts->expire_time = -1;
568         cb = ts->cb;
569         opaque = ts->opaque;
570 
571         /* run the callback (the timer list can be modified) */
572         qemu_mutex_unlock(&timer_list->active_timers_lock);
573         cb(opaque);
574         qemu_mutex_lock(&timer_list->active_timers_lock);
575 
576         progress = true;
577     }
578     qemu_mutex_unlock(&timer_list->active_timers_lock);
579 
580 out:
581     qemu_event_set(&timer_list->timers_done_ev);
582     return progress;
583 }
584 
585 bool qemu_clock_run_timers(QEMUClockType type)
586 {
587     return timerlist_run_timers(main_loop_tlg.tl[type]);
588 }
589 
590 void timerlistgroup_init(QEMUTimerListGroup *tlg,
591                          QEMUTimerListNotifyCB *cb, void *opaque)
592 {
593     QEMUClockType type;
594     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
595         tlg->tl[type] = timerlist_new(type, cb, opaque);
596     }
597 }
598 
599 void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
600 {
601     QEMUClockType type;
602     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
603         timerlist_free(tlg->tl[type]);
604     }
605 }
606 
607 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
608 {
609     QEMUClockType type;
610     bool progress = false;
611     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
612         progress |= timerlist_run_timers(tlg->tl[type]);
613     }
614     return progress;
615 }
616 
617 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
618 {
619     int64_t deadline = -1;
620     QEMUClockType type;
621     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
622         if (qemu_clock_use_for_deadline(type)) {
623             deadline = qemu_soonest_timeout(deadline,
624                                             timerlist_deadline_ns(tlg->tl[type]));
625         }
626     }
627     return deadline;
628 }
629 
630 int64_t qemu_clock_get_ns(QEMUClockType type)
631 {
632     int64_t now, last;
633     QEMUClock *clock = qemu_clock_ptr(type);
634 
635     switch (type) {
636     case QEMU_CLOCK_REALTIME:
637         return get_clock();
638     default:
639     case QEMU_CLOCK_VIRTUAL:
640         if (use_icount) {
641             return cpu_get_icount();
642         } else {
643             return cpu_get_clock();
644         }
645     case QEMU_CLOCK_HOST:
646         now = REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
647         last = clock->last;
648         clock->last = now;
649         if (now < last || now > (last + get_max_clock_jump())) {
650             notifier_list_notify(&clock->reset_notifiers, &now);
651         }
652         return now;
653     case QEMU_CLOCK_VIRTUAL_RT:
654         return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
655     }
656 }
657 
658 uint64_t qemu_clock_get_last(QEMUClockType type)
659 {
660     QEMUClock *clock = qemu_clock_ptr(type);
661     return clock->last;
662 }
663 
664 void qemu_clock_set_last(QEMUClockType type, uint64_t last)
665 {
666     QEMUClock *clock = qemu_clock_ptr(type);
667     clock->last = last;
668 }
669 
670 void qemu_clock_register_reset_notifier(QEMUClockType type,
671                                         Notifier *notifier)
672 {
673     QEMUClock *clock = qemu_clock_ptr(type);
674     notifier_list_add(&clock->reset_notifiers, notifier);
675 }
676 
677 void qemu_clock_unregister_reset_notifier(QEMUClockType type,
678                                           Notifier *notifier)
679 {
680     notifier_remove(notifier);
681 }
682 
683 void init_clocks(QEMUTimerListNotifyCB *notify_cb)
684 {
685     QEMUClockType type;
686     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
687         qemu_clock_init(type, notify_cb);
688     }
689 
690 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
691     prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
692 #endif
693 }
694 
695 uint64_t timer_expire_time_ns(QEMUTimer *ts)
696 {
697     return timer_pending(ts) ? ts->expire_time : -1;
698 }
699 
700 bool qemu_clock_run_all_timers(void)
701 {
702     bool progress = false;
703     QEMUClockType type;
704 
705     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
706         if (qemu_clock_use_for_deadline(type)) {
707             progress |= qemu_clock_run_timers(type);
708         }
709     }
710 
711     return progress;
712 }
713