xref: /openbmc/qemu/util/qemu-timer.c (revision 892609056ddff373f8c8c55525a53dd932ee403d)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/timer.h"
28 #include "sysemu/replay.h"
29 #include "sysemu/cpus.h"
30 
31 #ifdef CONFIG_POSIX
32 #include <pthread.h>
33 #endif
34 
35 #ifdef CONFIG_PPOLL
36 #include <poll.h>
37 #endif
38 
39 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
40 #include <sys/prctl.h>
41 #endif
42 
43 /***********************************************************/
44 /* timers */
45 
46 typedef struct QEMUClock {
47     /* We rely on BQL to protect the timerlists */
48     QLIST_HEAD(, QEMUTimerList) timerlists;
49 
50     QEMUClockType type;
51     bool enabled;
52 } QEMUClock;
53 
54 QEMUTimerListGroup main_loop_tlg;
55 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
56 
57 /* A QEMUTimerList is a list of timers attached to a clock. More
58  * than one QEMUTimerList can be attached to each clock, for instance
59  * used by different AioContexts / threads. Each clock also has
60  * a list of the QEMUTimerLists associated with it, in order that
61  * reenabling the clock can call all the notifiers.
62  */
63 
64 struct QEMUTimerList {
65     QEMUClock *clock;
66     QemuMutex active_timers_lock;
67     QEMUTimer *active_timers;
68     QLIST_ENTRY(QEMUTimerList) list;
69     QEMUTimerListNotifyCB *notify_cb;
70     void *notify_opaque;
71 
72     /* lightweight method to mark the end of timerlist's running */
73     QemuEvent timers_done_ev;
74 };
75 
76 /**
77  * qemu_clock_ptr:
78  * @type: type of clock
79  *
80  * Translate a clock type into a pointer to QEMUClock object.
81  *
82  * Returns: a pointer to the QEMUClock object
83  */
84 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
85 {
86     return &qemu_clocks[type];
87 }
88 
89 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
90 {
91     return timer_head && (timer_head->expire_time <= current_time);
92 }
93 
94 QEMUTimerList *timerlist_new(QEMUClockType type,
95                              QEMUTimerListNotifyCB *cb,
96                              void *opaque)
97 {
98     QEMUTimerList *timer_list;
99     QEMUClock *clock = qemu_clock_ptr(type);
100 
101     timer_list = g_malloc0(sizeof(QEMUTimerList));
102     qemu_event_init(&timer_list->timers_done_ev, true);
103     timer_list->clock = clock;
104     timer_list->notify_cb = cb;
105     timer_list->notify_opaque = opaque;
106     qemu_mutex_init(&timer_list->active_timers_lock);
107     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
108     return timer_list;
109 }
110 
111 void timerlist_free(QEMUTimerList *timer_list)
112 {
113     assert(!timerlist_has_timers(timer_list));
114     if (timer_list->clock) {
115         QLIST_REMOVE(timer_list, list);
116     }
117     qemu_mutex_destroy(&timer_list->active_timers_lock);
118     g_free(timer_list);
119 }
120 
121 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
122 {
123     QEMUClock *clock = qemu_clock_ptr(type);
124 
125     /* Assert that the clock of type TYPE has not been initialized yet. */
126     assert(main_loop_tlg.tl[type] == NULL);
127 
128     clock->type = type;
129     clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
130     QLIST_INIT(&clock->timerlists);
131     main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
132 }
133 
134 bool qemu_clock_use_for_deadline(QEMUClockType type)
135 {
136     return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
137 }
138 
139 void qemu_clock_notify(QEMUClockType type)
140 {
141     QEMUTimerList *timer_list;
142     QEMUClock *clock = qemu_clock_ptr(type);
143     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
144         timerlist_notify(timer_list);
145     }
146 }
147 
148 /* Disabling the clock will wait for related timerlists to stop
149  * executing qemu_run_timers.  Thus, this functions should not
150  * be used from the callback of a timer that is based on @clock.
151  * Doing so would cause a deadlock.
152  *
153  * Caller should hold BQL.
154  */
155 void qemu_clock_enable(QEMUClockType type, bool enabled)
156 {
157     QEMUClock *clock = qemu_clock_ptr(type);
158     QEMUTimerList *tl;
159     bool old = clock->enabled;
160     clock->enabled = enabled;
161     if (enabled && !old) {
162         qemu_clock_notify(type);
163     } else if (!enabled && old) {
164         QLIST_FOREACH(tl, &clock->timerlists, list) {
165             qemu_event_wait(&tl->timers_done_ev);
166         }
167     }
168 }
169 
170 bool timerlist_has_timers(QEMUTimerList *timer_list)
171 {
172     return !!atomic_read(&timer_list->active_timers);
173 }
174 
175 bool qemu_clock_has_timers(QEMUClockType type)
176 {
177     return timerlist_has_timers(
178         main_loop_tlg.tl[type]);
179 }
180 
181 bool timerlist_expired(QEMUTimerList *timer_list)
182 {
183     int64_t expire_time;
184 
185     if (!atomic_read(&timer_list->active_timers)) {
186         return false;
187     }
188 
189     qemu_mutex_lock(&timer_list->active_timers_lock);
190     if (!timer_list->active_timers) {
191         qemu_mutex_unlock(&timer_list->active_timers_lock);
192         return false;
193     }
194     expire_time = timer_list->active_timers->expire_time;
195     qemu_mutex_unlock(&timer_list->active_timers_lock);
196 
197     return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
198 }
199 
200 bool qemu_clock_expired(QEMUClockType type)
201 {
202     return timerlist_expired(
203         main_loop_tlg.tl[type]);
204 }
205 
206 /*
207  * As above, but return -1 for no deadline, and do not cap to 2^32
208  * as we know the result is always positive.
209  */
210 
211 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
212 {
213     int64_t delta;
214     int64_t expire_time;
215 
216     if (!atomic_read(&timer_list->active_timers)) {
217         return -1;
218     }
219 
220     if (!timer_list->clock->enabled) {
221         return -1;
222     }
223 
224     /* The active timers list may be modified before the caller uses our return
225      * value but ->notify_cb() is called when the deadline changes.  Therefore
226      * the caller should notice the change and there is no race condition.
227      */
228     qemu_mutex_lock(&timer_list->active_timers_lock);
229     if (!timer_list->active_timers) {
230         qemu_mutex_unlock(&timer_list->active_timers_lock);
231         return -1;
232     }
233     expire_time = timer_list->active_timers->expire_time;
234     qemu_mutex_unlock(&timer_list->active_timers_lock);
235 
236     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
237 
238     if (delta <= 0) {
239         return 0;
240     }
241 
242     return delta;
243 }
244 
245 /* Calculate the soonest deadline across all timerlists attached
246  * to the clock. This is used for the icount timeout so we
247  * ignore whether or not the clock should be used in deadline
248  * calculations.
249  */
250 int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
251 {
252     int64_t deadline = -1;
253     int64_t delta;
254     int64_t expire_time;
255     QEMUTimer *ts;
256     QEMUTimerList *timer_list;
257     QEMUClock *clock = qemu_clock_ptr(type);
258 
259     if (!clock->enabled) {
260         return -1;
261     }
262 
263     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
264         qemu_mutex_lock(&timer_list->active_timers_lock);
265         ts = timer_list->active_timers;
266         /* Skip all external timers */
267         while (ts && (ts->attributes & ~attr_mask)) {
268             ts = ts->next;
269         }
270         if (!ts) {
271             qemu_mutex_unlock(&timer_list->active_timers_lock);
272             continue;
273         }
274         expire_time = ts->expire_time;
275         qemu_mutex_unlock(&timer_list->active_timers_lock);
276 
277         delta = expire_time - qemu_clock_get_ns(type);
278         if (delta <= 0) {
279             delta = 0;
280         }
281         deadline = qemu_soonest_timeout(deadline, delta);
282     }
283     return deadline;
284 }
285 
286 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
287 {
288     return timer_list->clock->type;
289 }
290 
291 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
292 {
293     return main_loop_tlg.tl[type];
294 }
295 
296 void timerlist_notify(QEMUTimerList *timer_list)
297 {
298     if (timer_list->notify_cb) {
299         timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
300     } else {
301         qemu_notify_event();
302     }
303 }
304 
305 /* Transition function to convert a nanosecond timeout to ms
306  * This is used where a system does not support ppoll
307  */
308 int qemu_timeout_ns_to_ms(int64_t ns)
309 {
310     int64_t ms;
311     if (ns < 0) {
312         return -1;
313     }
314 
315     if (!ns) {
316         return 0;
317     }
318 
319     /* Always round up, because it's better to wait too long than to wait too
320      * little and effectively busy-wait
321      */
322     ms = DIV_ROUND_UP(ns, SCALE_MS);
323 
324     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
325     if (ms > (int64_t) INT32_MAX) {
326         ms = INT32_MAX;
327     }
328 
329     return (int) ms;
330 }
331 
332 
333 /* qemu implementation of g_poll which uses a nanosecond timeout but is
334  * otherwise identical to g_poll
335  */
336 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
337 {
338 #ifdef CONFIG_PPOLL
339     if (timeout < 0) {
340         return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
341     } else {
342         struct timespec ts;
343         int64_t tvsec = timeout / 1000000000LL;
344         /* Avoid possibly overflowing and specifying a negative number of
345          * seconds, which would turn a very long timeout into a busy-wait.
346          */
347         if (tvsec > (int64_t)INT32_MAX) {
348             tvsec = INT32_MAX;
349         }
350         ts.tv_sec = tvsec;
351         ts.tv_nsec = timeout % 1000000000LL;
352         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
353     }
354 #else
355     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
356 #endif
357 }
358 
359 
360 void timer_init_full(QEMUTimer *ts,
361                      QEMUTimerListGroup *timer_list_group, QEMUClockType type,
362                      int scale, int attributes,
363                      QEMUTimerCB *cb, void *opaque)
364 {
365     if (!timer_list_group) {
366         timer_list_group = &main_loop_tlg;
367     }
368     ts->timer_list = timer_list_group->tl[type];
369     ts->cb = cb;
370     ts->opaque = opaque;
371     ts->scale = scale;
372     ts->attributes = attributes;
373     ts->expire_time = -1;
374 }
375 
376 void timer_deinit(QEMUTimer *ts)
377 {
378     assert(ts->expire_time == -1);
379     ts->timer_list = NULL;
380 }
381 
382 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
383 {
384     QEMUTimer **pt, *t;
385 
386     ts->expire_time = -1;
387     pt = &timer_list->active_timers;
388     for(;;) {
389         t = *pt;
390         if (!t)
391             break;
392         if (t == ts) {
393             atomic_set(pt, t->next);
394             break;
395         }
396         pt = &t->next;
397     }
398 }
399 
400 static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
401                                 QEMUTimer *ts, int64_t expire_time)
402 {
403     QEMUTimer **pt, *t;
404 
405     /* add the timer in the sorted list */
406     pt = &timer_list->active_timers;
407     for (;;) {
408         t = *pt;
409         if (!timer_expired_ns(t, expire_time)) {
410             break;
411         }
412         pt = &t->next;
413     }
414     ts->expire_time = MAX(expire_time, 0);
415     ts->next = *pt;
416     atomic_set(pt, ts);
417 
418     return pt == &timer_list->active_timers;
419 }
420 
421 static void timerlist_rearm(QEMUTimerList *timer_list)
422 {
423     /* Interrupt execution to force deadline recalculation.  */
424     if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
425         qemu_start_warp_timer();
426     }
427     timerlist_notify(timer_list);
428 }
429 
430 /* stop a timer, but do not dealloc it */
431 void timer_del(QEMUTimer *ts)
432 {
433     QEMUTimerList *timer_list = ts->timer_list;
434 
435     if (timer_list) {
436         qemu_mutex_lock(&timer_list->active_timers_lock);
437         timer_del_locked(timer_list, ts);
438         qemu_mutex_unlock(&timer_list->active_timers_lock);
439     }
440 }
441 
442 /* modify the current timer so that it will be fired when current_time
443    >= expire_time. The corresponding callback will be called. */
444 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
445 {
446     QEMUTimerList *timer_list = ts->timer_list;
447     bool rearm;
448 
449     qemu_mutex_lock(&timer_list->active_timers_lock);
450     timer_del_locked(timer_list, ts);
451     rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
452     qemu_mutex_unlock(&timer_list->active_timers_lock);
453 
454     if (rearm) {
455         timerlist_rearm(timer_list);
456     }
457 }
458 
459 /* modify the current timer so that it will be fired when current_time
460    >= expire_time or the current deadline, whichever comes earlier.
461    The corresponding callback will be called. */
462 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
463 {
464     QEMUTimerList *timer_list = ts->timer_list;
465     bool rearm;
466 
467     qemu_mutex_lock(&timer_list->active_timers_lock);
468     if (ts->expire_time == -1 || ts->expire_time > expire_time) {
469         if (ts->expire_time != -1) {
470             timer_del_locked(timer_list, ts);
471         }
472         rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
473     } else {
474         rearm = false;
475     }
476     qemu_mutex_unlock(&timer_list->active_timers_lock);
477 
478     if (rearm) {
479         timerlist_rearm(timer_list);
480     }
481 }
482 
483 void timer_mod(QEMUTimer *ts, int64_t expire_time)
484 {
485     timer_mod_ns(ts, expire_time * ts->scale);
486 }
487 
488 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
489 {
490     timer_mod_anticipate_ns(ts, expire_time * ts->scale);
491 }
492 
493 bool timer_pending(QEMUTimer *ts)
494 {
495     return ts->expire_time >= 0;
496 }
497 
498 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
499 {
500     return timer_expired_ns(timer_head, current_time * timer_head->scale);
501 }
502 
503 bool timerlist_run_timers(QEMUTimerList *timer_list)
504 {
505     QEMUTimer *ts;
506     int64_t current_time;
507     bool progress = false;
508     QEMUTimerCB *cb;
509     void *opaque;
510     bool need_replay_checkpoint = false;
511 
512     if (!atomic_read(&timer_list->active_timers)) {
513         return false;
514     }
515 
516     qemu_event_reset(&timer_list->timers_done_ev);
517     if (!timer_list->clock->enabled) {
518         goto out;
519     }
520 
521     switch (timer_list->clock->type) {
522     case QEMU_CLOCK_REALTIME:
523         break;
524     default:
525     case QEMU_CLOCK_VIRTUAL:
526         if (replay_mode != REPLAY_MODE_NONE) {
527             /* Checkpoint for virtual clock is redundant in cases where
528              * it's being triggered with only non-EXTERNAL timers, because
529              * these timers don't change guest state directly.
530              * Since it has conditional dependence on specific timers, it is
531              * subject to race conditions and requires special handling.
532              * See below.
533              */
534             need_replay_checkpoint = true;
535         }
536         break;
537     case QEMU_CLOCK_HOST:
538         if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
539             goto out;
540         }
541         break;
542     case QEMU_CLOCK_VIRTUAL_RT:
543         if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
544             goto out;
545         }
546         break;
547     }
548 
549     /*
550      * Extract expired timers from active timers list and and process them.
551      *
552      * In rr mode we need "filtered" checkpointing for virtual clock.  The
553      * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
554      * and that must only be done once since the clock value stays the same. Because
555      * non-EXTERNAL timers may appear in the timers list while it being processed,
556      * the checkpoint can be issued at a time until no timers are left and we are
557      * done".
558      */
559     current_time = qemu_clock_get_ns(timer_list->clock->type);
560     qemu_mutex_lock(&timer_list->active_timers_lock);
561     while ((ts = timer_list->active_timers)) {
562         if (!timer_expired_ns(ts, current_time)) {
563             /* No expired timers left.  The checkpoint can be skipped
564              * if no timers fired or they were all external.
565              */
566             break;
567         }
568         if (need_replay_checkpoint
569                 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
570             /* once we got here, checkpoint clock only once */
571             need_replay_checkpoint = false;
572             qemu_mutex_unlock(&timer_list->active_timers_lock);
573             if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
574                 goto out;
575             }
576             qemu_mutex_lock(&timer_list->active_timers_lock);
577             /* The lock was released; start over again in case the list was
578              * modified.
579              */
580             continue;
581         }
582 
583         /* remove timer from the list before calling the callback */
584         timer_list->active_timers = ts->next;
585         ts->next = NULL;
586         ts->expire_time = -1;
587         cb = ts->cb;
588         opaque = ts->opaque;
589 
590         /* run the callback (the timer list can be modified) */
591         qemu_mutex_unlock(&timer_list->active_timers_lock);
592         cb(opaque);
593         qemu_mutex_lock(&timer_list->active_timers_lock);
594 
595         progress = true;
596     }
597     qemu_mutex_unlock(&timer_list->active_timers_lock);
598 
599 out:
600     qemu_event_set(&timer_list->timers_done_ev);
601     return progress;
602 }
603 
604 bool qemu_clock_run_timers(QEMUClockType type)
605 {
606     return timerlist_run_timers(main_loop_tlg.tl[type]);
607 }
608 
609 void timerlistgroup_init(QEMUTimerListGroup *tlg,
610                          QEMUTimerListNotifyCB *cb, void *opaque)
611 {
612     QEMUClockType type;
613     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
614         tlg->tl[type] = timerlist_new(type, cb, opaque);
615     }
616 }
617 
618 void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
619 {
620     QEMUClockType type;
621     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
622         timerlist_free(tlg->tl[type]);
623     }
624 }
625 
626 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
627 {
628     QEMUClockType type;
629     bool progress = false;
630     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
631         progress |= timerlist_run_timers(tlg->tl[type]);
632     }
633     return progress;
634 }
635 
636 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
637 {
638     int64_t deadline = -1;
639     QEMUClockType type;
640     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
641         if (qemu_clock_use_for_deadline(type)) {
642             deadline = qemu_soonest_timeout(deadline,
643                                             timerlist_deadline_ns(tlg->tl[type]));
644         }
645     }
646     return deadline;
647 }
648 
649 int64_t qemu_clock_get_ns(QEMUClockType type)
650 {
651     switch (type) {
652     case QEMU_CLOCK_REALTIME:
653         return get_clock();
654     default:
655     case QEMU_CLOCK_VIRTUAL:
656         if (use_icount) {
657             return cpu_get_icount();
658         } else {
659             return cpu_get_clock();
660         }
661     case QEMU_CLOCK_HOST:
662         return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
663     case QEMU_CLOCK_VIRTUAL_RT:
664         return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
665     }
666 }
667 
668 void init_clocks(QEMUTimerListNotifyCB *notify_cb)
669 {
670     QEMUClockType type;
671     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
672         qemu_clock_init(type, notify_cb);
673     }
674 
675 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
676     prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
677 #endif
678 }
679 
680 uint64_t timer_expire_time_ns(QEMUTimer *ts)
681 {
682     return timer_pending(ts) ? ts->expire_time : -1;
683 }
684 
685 bool qemu_clock_run_all_timers(void)
686 {
687     bool progress = false;
688     QEMUClockType type;
689 
690     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
691         if (qemu_clock_use_for_deadline(type)) {
692             progress |= qemu_clock_run_timers(type);
693         }
694     }
695 
696     return progress;
697 }
698