xref: /openbmc/qemu/util/qemu-timer.c (revision 4b9fa0b4)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/timer.h"
28 #include "sysemu/replay.h"
29 #include "sysemu/cpus.h"
30 
31 #ifdef CONFIG_POSIX
32 #include <pthread.h>
33 #endif
34 
35 #ifdef CONFIG_PPOLL
36 #include <poll.h>
37 #endif
38 
39 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
40 #include <sys/prctl.h>
41 #endif
42 
43 /***********************************************************/
44 /* timers */
45 
46 typedef struct QEMUClock {
47     /* We rely on BQL to protect the timerlists */
48     QLIST_HEAD(, QEMUTimerList) timerlists;
49 
50     QEMUClockType type;
51     bool enabled;
52 } QEMUClock;
53 
54 QEMUTimerListGroup main_loop_tlg;
55 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
56 
57 /* A QEMUTimerList is a list of timers attached to a clock. More
58  * than one QEMUTimerList can be attached to each clock, for instance
59  * used by different AioContexts / threads. Each clock also has
60  * a list of the QEMUTimerLists associated with it, in order that
61  * reenabling the clock can call all the notifiers.
62  */
63 
64 struct QEMUTimerList {
65     QEMUClock *clock;
66     QemuMutex active_timers_lock;
67     QEMUTimer *active_timers;
68     QLIST_ENTRY(QEMUTimerList) list;
69     QEMUTimerListNotifyCB *notify_cb;
70     void *notify_opaque;
71 
72     /* lightweight method to mark the end of timerlist's running */
73     QemuEvent timers_done_ev;
74 };
75 
76 /**
77  * qemu_clock_ptr:
78  * @type: type of clock
79  *
80  * Translate a clock type into a pointer to QEMUClock object.
81  *
82  * Returns: a pointer to the QEMUClock object
83  */
84 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
85 {
86     return &qemu_clocks[type];
87 }
88 
89 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
90 {
91     return timer_head && (timer_head->expire_time <= current_time);
92 }
93 
94 QEMUTimerList *timerlist_new(QEMUClockType type,
95                              QEMUTimerListNotifyCB *cb,
96                              void *opaque)
97 {
98     QEMUTimerList *timer_list;
99     QEMUClock *clock = qemu_clock_ptr(type);
100 
101     timer_list = g_malloc0(sizeof(QEMUTimerList));
102     qemu_event_init(&timer_list->timers_done_ev, true);
103     timer_list->clock = clock;
104     timer_list->notify_cb = cb;
105     timer_list->notify_opaque = opaque;
106     qemu_mutex_init(&timer_list->active_timers_lock);
107     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
108     return timer_list;
109 }
110 
111 void timerlist_free(QEMUTimerList *timer_list)
112 {
113     assert(!timerlist_has_timers(timer_list));
114     if (timer_list->clock) {
115         QLIST_REMOVE(timer_list, list);
116     }
117     qemu_mutex_destroy(&timer_list->active_timers_lock);
118     g_free(timer_list);
119 }
120 
121 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
122 {
123     QEMUClock *clock = qemu_clock_ptr(type);
124 
125     /* Assert that the clock of type TYPE has not been initialized yet. */
126     assert(main_loop_tlg.tl[type] == NULL);
127 
128     clock->type = type;
129     clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
130     QLIST_INIT(&clock->timerlists);
131     main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
132 }
133 
134 bool qemu_clock_use_for_deadline(QEMUClockType type)
135 {
136     return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
137 }
138 
139 void qemu_clock_notify(QEMUClockType type)
140 {
141     QEMUTimerList *timer_list;
142     QEMUClock *clock = qemu_clock_ptr(type);
143     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
144         timerlist_notify(timer_list);
145     }
146 }
147 
148 /* Disabling the clock will wait for related timerlists to stop
149  * executing qemu_run_timers.  Thus, this functions should not
150  * be used from the callback of a timer that is based on @clock.
151  * Doing so would cause a deadlock.
152  *
153  * Caller should hold BQL.
154  */
155 void qemu_clock_enable(QEMUClockType type, bool enabled)
156 {
157     QEMUClock *clock = qemu_clock_ptr(type);
158     QEMUTimerList *tl;
159     bool old = clock->enabled;
160     clock->enabled = enabled;
161     if (enabled && !old) {
162         qemu_clock_notify(type);
163     } else if (!enabled && old) {
164         QLIST_FOREACH(tl, &clock->timerlists, list) {
165             qemu_event_wait(&tl->timers_done_ev);
166         }
167     }
168 }
169 
170 bool timerlist_has_timers(QEMUTimerList *timer_list)
171 {
172     return !!atomic_read(&timer_list->active_timers);
173 }
174 
175 bool qemu_clock_has_timers(QEMUClockType type)
176 {
177     return timerlist_has_timers(
178         main_loop_tlg.tl[type]);
179 }
180 
181 bool timerlist_expired(QEMUTimerList *timer_list)
182 {
183     int64_t expire_time;
184 
185     if (!atomic_read(&timer_list->active_timers)) {
186         return false;
187     }
188 
189     qemu_mutex_lock(&timer_list->active_timers_lock);
190     if (!timer_list->active_timers) {
191         qemu_mutex_unlock(&timer_list->active_timers_lock);
192         return false;
193     }
194     expire_time = timer_list->active_timers->expire_time;
195     qemu_mutex_unlock(&timer_list->active_timers_lock);
196 
197     return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
198 }
199 
200 bool qemu_clock_expired(QEMUClockType type)
201 {
202     return timerlist_expired(
203         main_loop_tlg.tl[type]);
204 }
205 
206 /*
207  * As above, but return -1 for no deadline, and do not cap to 2^32
208  * as we know the result is always positive.
209  */
210 
211 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
212 {
213     int64_t delta;
214     int64_t expire_time;
215 
216     if (!atomic_read(&timer_list->active_timers)) {
217         return -1;
218     }
219 
220     if (!timer_list->clock->enabled) {
221         return -1;
222     }
223 
224     /* The active timers list may be modified before the caller uses our return
225      * value but ->notify_cb() is called when the deadline changes.  Therefore
226      * the caller should notice the change and there is no race condition.
227      */
228     qemu_mutex_lock(&timer_list->active_timers_lock);
229     if (!timer_list->active_timers) {
230         qemu_mutex_unlock(&timer_list->active_timers_lock);
231         return -1;
232     }
233     expire_time = timer_list->active_timers->expire_time;
234     qemu_mutex_unlock(&timer_list->active_timers_lock);
235 
236     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
237 
238     if (delta <= 0) {
239         return 0;
240     }
241 
242     return delta;
243 }
244 
245 /* Calculate the soonest deadline across all timerlists attached
246  * to the clock. This is used for the icount timeout so we
247  * ignore whether or not the clock should be used in deadline
248  * calculations.
249  */
250 int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
251 {
252     int64_t deadline = -1;
253     int64_t delta;
254     int64_t expire_time;
255     QEMUTimer *ts;
256     QEMUTimerList *timer_list;
257     QEMUClock *clock = qemu_clock_ptr(type);
258 
259     if (!clock->enabled) {
260         return -1;
261     }
262 
263     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
264         qemu_mutex_lock(&timer_list->active_timers_lock);
265         ts = timer_list->active_timers;
266         /* Skip all external timers */
267         while (ts && (ts->attributes & ~attr_mask)) {
268             ts = ts->next;
269         }
270         if (!ts) {
271             qemu_mutex_unlock(&timer_list->active_timers_lock);
272             continue;
273         }
274         expire_time = ts->expire_time;
275         qemu_mutex_unlock(&timer_list->active_timers_lock);
276 
277         delta = expire_time - qemu_clock_get_ns(type);
278         if (delta <= 0) {
279             delta = 0;
280         }
281         deadline = qemu_soonest_timeout(deadline, delta);
282     }
283     return deadline;
284 }
285 
286 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
287 {
288     return timer_list->clock->type;
289 }
290 
291 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
292 {
293     return main_loop_tlg.tl[type];
294 }
295 
296 void timerlist_notify(QEMUTimerList *timer_list)
297 {
298     if (timer_list->notify_cb) {
299         timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
300     } else {
301         qemu_notify_event();
302     }
303 }
304 
305 /* Transition function to convert a nanosecond timeout to ms
306  * This is used where a system does not support ppoll
307  */
308 int qemu_timeout_ns_to_ms(int64_t ns)
309 {
310     int64_t ms;
311     if (ns < 0) {
312         return -1;
313     }
314 
315     if (!ns) {
316         return 0;
317     }
318 
319     /* Always round up, because it's better to wait too long than to wait too
320      * little and effectively busy-wait
321      */
322     ms = DIV_ROUND_UP(ns, SCALE_MS);
323 
324     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
325     return MIN(ms, INT32_MAX);
326 }
327 
328 
329 /* qemu implementation of g_poll which uses a nanosecond timeout but is
330  * otherwise identical to g_poll
331  */
332 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
333 {
334 #ifdef CONFIG_PPOLL
335     if (timeout < 0) {
336         return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
337     } else {
338         struct timespec ts;
339         int64_t tvsec = timeout / 1000000000LL;
340         /* Avoid possibly overflowing and specifying a negative number of
341          * seconds, which would turn a very long timeout into a busy-wait.
342          */
343         if (tvsec > (int64_t)INT32_MAX) {
344             tvsec = INT32_MAX;
345         }
346         ts.tv_sec = tvsec;
347         ts.tv_nsec = timeout % 1000000000LL;
348         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
349     }
350 #else
351     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
352 #endif
353 }
354 
355 
356 void timer_init_full(QEMUTimer *ts,
357                      QEMUTimerListGroup *timer_list_group, QEMUClockType type,
358                      int scale, int attributes,
359                      QEMUTimerCB *cb, void *opaque)
360 {
361     if (!timer_list_group) {
362         timer_list_group = &main_loop_tlg;
363     }
364     ts->timer_list = timer_list_group->tl[type];
365     ts->cb = cb;
366     ts->opaque = opaque;
367     ts->scale = scale;
368     ts->attributes = attributes;
369     ts->expire_time = -1;
370 }
371 
372 void timer_deinit(QEMUTimer *ts)
373 {
374     assert(ts->expire_time == -1);
375     ts->timer_list = NULL;
376 }
377 
378 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
379 {
380     QEMUTimer **pt, *t;
381 
382     ts->expire_time = -1;
383     pt = &timer_list->active_timers;
384     for(;;) {
385         t = *pt;
386         if (!t)
387             break;
388         if (t == ts) {
389             atomic_set(pt, t->next);
390             break;
391         }
392         pt = &t->next;
393     }
394 }
395 
396 static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
397                                 QEMUTimer *ts, int64_t expire_time)
398 {
399     QEMUTimer **pt, *t;
400 
401     /* add the timer in the sorted list */
402     pt = &timer_list->active_timers;
403     for (;;) {
404         t = *pt;
405         if (!timer_expired_ns(t, expire_time)) {
406             break;
407         }
408         pt = &t->next;
409     }
410     ts->expire_time = MAX(expire_time, 0);
411     ts->next = *pt;
412     atomic_set(pt, ts);
413 
414     return pt == &timer_list->active_timers;
415 }
416 
417 static void timerlist_rearm(QEMUTimerList *timer_list)
418 {
419     /* Interrupt execution to force deadline recalculation.  */
420     if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
421         qemu_start_warp_timer();
422     }
423     timerlist_notify(timer_list);
424 }
425 
426 /* stop a timer, but do not dealloc it */
427 void timer_del(QEMUTimer *ts)
428 {
429     QEMUTimerList *timer_list = ts->timer_list;
430 
431     if (timer_list) {
432         qemu_mutex_lock(&timer_list->active_timers_lock);
433         timer_del_locked(timer_list, ts);
434         qemu_mutex_unlock(&timer_list->active_timers_lock);
435     }
436 }
437 
438 /* modify the current timer so that it will be fired when current_time
439    >= expire_time. The corresponding callback will be called. */
440 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
441 {
442     QEMUTimerList *timer_list = ts->timer_list;
443     bool rearm;
444 
445     qemu_mutex_lock(&timer_list->active_timers_lock);
446     timer_del_locked(timer_list, ts);
447     rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
448     qemu_mutex_unlock(&timer_list->active_timers_lock);
449 
450     if (rearm) {
451         timerlist_rearm(timer_list);
452     }
453 }
454 
455 /* modify the current timer so that it will be fired when current_time
456    >= expire_time or the current deadline, whichever comes earlier.
457    The corresponding callback will be called. */
458 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
459 {
460     QEMUTimerList *timer_list = ts->timer_list;
461     bool rearm;
462 
463     qemu_mutex_lock(&timer_list->active_timers_lock);
464     if (ts->expire_time == -1 || ts->expire_time > expire_time) {
465         if (ts->expire_time != -1) {
466             timer_del_locked(timer_list, ts);
467         }
468         rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
469     } else {
470         rearm = false;
471     }
472     qemu_mutex_unlock(&timer_list->active_timers_lock);
473 
474     if (rearm) {
475         timerlist_rearm(timer_list);
476     }
477 }
478 
479 void timer_mod(QEMUTimer *ts, int64_t expire_time)
480 {
481     timer_mod_ns(ts, expire_time * ts->scale);
482 }
483 
484 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
485 {
486     timer_mod_anticipate_ns(ts, expire_time * ts->scale);
487 }
488 
489 bool timer_pending(QEMUTimer *ts)
490 {
491     return ts->expire_time >= 0;
492 }
493 
494 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
495 {
496     return timer_expired_ns(timer_head, current_time * timer_head->scale);
497 }
498 
499 bool timerlist_run_timers(QEMUTimerList *timer_list)
500 {
501     QEMUTimer *ts;
502     int64_t current_time;
503     bool progress = false;
504     QEMUTimerCB *cb;
505     void *opaque;
506     bool need_replay_checkpoint = false;
507 
508     if (!atomic_read(&timer_list->active_timers)) {
509         return false;
510     }
511 
512     qemu_event_reset(&timer_list->timers_done_ev);
513     if (!timer_list->clock->enabled) {
514         goto out;
515     }
516 
517     switch (timer_list->clock->type) {
518     case QEMU_CLOCK_REALTIME:
519         break;
520     default:
521     case QEMU_CLOCK_VIRTUAL:
522         if (replay_mode != REPLAY_MODE_NONE) {
523             /* Checkpoint for virtual clock is redundant in cases where
524              * it's being triggered with only non-EXTERNAL timers, because
525              * these timers don't change guest state directly.
526              * Since it has conditional dependence on specific timers, it is
527              * subject to race conditions and requires special handling.
528              * See below.
529              */
530             need_replay_checkpoint = true;
531         }
532         break;
533     case QEMU_CLOCK_HOST:
534         if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
535             goto out;
536         }
537         break;
538     case QEMU_CLOCK_VIRTUAL_RT:
539         if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
540             goto out;
541         }
542         break;
543     }
544 
545     /*
546      * Extract expired timers from active timers list and and process them.
547      *
548      * In rr mode we need "filtered" checkpointing for virtual clock.  The
549      * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
550      * and that must only be done once since the clock value stays the same. Because
551      * non-EXTERNAL timers may appear in the timers list while it being processed,
552      * the checkpoint can be issued at a time until no timers are left and we are
553      * done".
554      */
555     current_time = qemu_clock_get_ns(timer_list->clock->type);
556     qemu_mutex_lock(&timer_list->active_timers_lock);
557     while ((ts = timer_list->active_timers)) {
558         if (!timer_expired_ns(ts, current_time)) {
559             /* No expired timers left.  The checkpoint can be skipped
560              * if no timers fired or they were all external.
561              */
562             break;
563         }
564         if (need_replay_checkpoint
565                 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
566             /* once we got here, checkpoint clock only once */
567             need_replay_checkpoint = false;
568             qemu_mutex_unlock(&timer_list->active_timers_lock);
569             if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
570                 goto out;
571             }
572             qemu_mutex_lock(&timer_list->active_timers_lock);
573             /* The lock was released; start over again in case the list was
574              * modified.
575              */
576             continue;
577         }
578 
579         /* remove timer from the list before calling the callback */
580         timer_list->active_timers = ts->next;
581         ts->next = NULL;
582         ts->expire_time = -1;
583         cb = ts->cb;
584         opaque = ts->opaque;
585 
586         /* run the callback (the timer list can be modified) */
587         qemu_mutex_unlock(&timer_list->active_timers_lock);
588         cb(opaque);
589         qemu_mutex_lock(&timer_list->active_timers_lock);
590 
591         progress = true;
592     }
593     qemu_mutex_unlock(&timer_list->active_timers_lock);
594 
595 out:
596     qemu_event_set(&timer_list->timers_done_ev);
597     return progress;
598 }
599 
600 bool qemu_clock_run_timers(QEMUClockType type)
601 {
602     return timerlist_run_timers(main_loop_tlg.tl[type]);
603 }
604 
605 void timerlistgroup_init(QEMUTimerListGroup *tlg,
606                          QEMUTimerListNotifyCB *cb, void *opaque)
607 {
608     QEMUClockType type;
609     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
610         tlg->tl[type] = timerlist_new(type, cb, opaque);
611     }
612 }
613 
614 void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
615 {
616     QEMUClockType type;
617     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
618         timerlist_free(tlg->tl[type]);
619     }
620 }
621 
622 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
623 {
624     QEMUClockType type;
625     bool progress = false;
626     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
627         progress |= timerlist_run_timers(tlg->tl[type]);
628     }
629     return progress;
630 }
631 
632 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
633 {
634     int64_t deadline = -1;
635     QEMUClockType type;
636     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
637         if (qemu_clock_use_for_deadline(type)) {
638             deadline = qemu_soonest_timeout(deadline,
639                                             timerlist_deadline_ns(tlg->tl[type]));
640         }
641     }
642     return deadline;
643 }
644 
645 int64_t qemu_clock_get_ns(QEMUClockType type)
646 {
647     switch (type) {
648     case QEMU_CLOCK_REALTIME:
649         return get_clock();
650     default:
651     case QEMU_CLOCK_VIRTUAL:
652         if (use_icount) {
653             return cpu_get_icount();
654         } else {
655             return cpu_get_clock();
656         }
657     case QEMU_CLOCK_HOST:
658         return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
659     case QEMU_CLOCK_VIRTUAL_RT:
660         return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
661     }
662 }
663 
664 void init_clocks(QEMUTimerListNotifyCB *notify_cb)
665 {
666     QEMUClockType type;
667     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
668         qemu_clock_init(type, notify_cb);
669     }
670 
671 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
672     prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
673 #endif
674 }
675 
676 uint64_t timer_expire_time_ns(QEMUTimer *ts)
677 {
678     return timer_pending(ts) ? ts->expire_time : -1;
679 }
680 
681 bool qemu_clock_run_all_timers(void)
682 {
683     bool progress = false;
684     QEMUClockType type;
685 
686     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
687         if (qemu_clock_use_for_deadline(type)) {
688             progress |= qemu_clock_run_timers(type);
689         }
690     }
691 
692     return progress;
693 }
694