xref: /openbmc/qemu/util/qemu-timer.c (revision 245429e4a0e036471ba46e2393b6f33b78b9615e)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/timer.h"
28 #include "sysemu/replay.h"
29 #include "sysemu/cpus.h"
30 
31 #ifdef CONFIG_POSIX
32 #include <pthread.h>
33 #endif
34 
35 #ifdef CONFIG_PPOLL
36 #include <poll.h>
37 #endif
38 
39 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
40 #include <sys/prctl.h>
41 #endif
42 
43 /***********************************************************/
44 /* timers */
45 
46 typedef struct QEMUClock {
47     /* We rely on BQL to protect the timerlists */
48     QLIST_HEAD(, QEMUTimerList) timerlists;
49 
50     QEMUClockType type;
51     bool enabled;
52 } QEMUClock;
53 
54 QEMUTimerListGroup main_loop_tlg;
55 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
56 
57 /* A QEMUTimerList is a list of timers attached to a clock. More
58  * than one QEMUTimerList can be attached to each clock, for instance
59  * used by different AioContexts / threads. Each clock also has
60  * a list of the QEMUTimerLists associated with it, in order that
61  * reenabling the clock can call all the notifiers.
62  */
63 
64 struct QEMUTimerList {
65     QEMUClock *clock;
66     QemuMutex active_timers_lock;
67     QEMUTimer *active_timers;
68     QLIST_ENTRY(QEMUTimerList) list;
69     QEMUTimerListNotifyCB *notify_cb;
70     void *notify_opaque;
71 
72     /* lightweight method to mark the end of timerlist's running */
73     QemuEvent timers_done_ev;
74 };
75 
76 /**
77  * qemu_clock_ptr:
78  * @type: type of clock
79  *
80  * Translate a clock type into a pointer to QEMUClock object.
81  *
82  * Returns: a pointer to the QEMUClock object
83  */
84 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
85 {
86     return &qemu_clocks[type];
87 }
88 
89 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
90 {
91     return timer_head && (timer_head->expire_time <= current_time);
92 }
93 
94 QEMUTimerList *timerlist_new(QEMUClockType type,
95                              QEMUTimerListNotifyCB *cb,
96                              void *opaque)
97 {
98     QEMUTimerList *timer_list;
99     QEMUClock *clock = qemu_clock_ptr(type);
100 
101     timer_list = g_malloc0(sizeof(QEMUTimerList));
102     qemu_event_init(&timer_list->timers_done_ev, true);
103     timer_list->clock = clock;
104     timer_list->notify_cb = cb;
105     timer_list->notify_opaque = opaque;
106     qemu_mutex_init(&timer_list->active_timers_lock);
107     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
108     return timer_list;
109 }
110 
111 void timerlist_free(QEMUTimerList *timer_list)
112 {
113     assert(!timerlist_has_timers(timer_list));
114     if (timer_list->clock) {
115         QLIST_REMOVE(timer_list, list);
116     }
117     qemu_mutex_destroy(&timer_list->active_timers_lock);
118     g_free(timer_list);
119 }
120 
121 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
122 {
123     QEMUClock *clock = qemu_clock_ptr(type);
124 
125     /* Assert that the clock of type TYPE has not been initialized yet. */
126     assert(main_loop_tlg.tl[type] == NULL);
127 
128     clock->type = type;
129     clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
130     QLIST_INIT(&clock->timerlists);
131     main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
132 }
133 
134 bool qemu_clock_use_for_deadline(QEMUClockType type)
135 {
136     return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
137 }
138 
139 void qemu_clock_notify(QEMUClockType type)
140 {
141     QEMUTimerList *timer_list;
142     QEMUClock *clock = qemu_clock_ptr(type);
143     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
144         timerlist_notify(timer_list);
145     }
146 }
147 
148 /* Disabling the clock will wait for related timerlists to stop
149  * executing qemu_run_timers.  Thus, this functions should not
150  * be used from the callback of a timer that is based on @clock.
151  * Doing so would cause a deadlock.
152  *
153  * Caller should hold BQL.
154  */
155 void qemu_clock_enable(QEMUClockType type, bool enabled)
156 {
157     QEMUClock *clock = qemu_clock_ptr(type);
158     QEMUTimerList *tl;
159     bool old = clock->enabled;
160     clock->enabled = enabled;
161     if (enabled && !old) {
162         qemu_clock_notify(type);
163     } else if (!enabled && old) {
164         QLIST_FOREACH(tl, &clock->timerlists, list) {
165             qemu_event_wait(&tl->timers_done_ev);
166         }
167     }
168 }
169 
170 bool timerlist_has_timers(QEMUTimerList *timer_list)
171 {
172     return !!atomic_read(&timer_list->active_timers);
173 }
174 
175 bool qemu_clock_has_timers(QEMUClockType type)
176 {
177     return timerlist_has_timers(
178         main_loop_tlg.tl[type]);
179 }
180 
181 bool timerlist_expired(QEMUTimerList *timer_list)
182 {
183     int64_t expire_time;
184 
185     if (!atomic_read(&timer_list->active_timers)) {
186         return false;
187     }
188 
189     qemu_mutex_lock(&timer_list->active_timers_lock);
190     if (!timer_list->active_timers) {
191         qemu_mutex_unlock(&timer_list->active_timers_lock);
192         return false;
193     }
194     expire_time = timer_list->active_timers->expire_time;
195     qemu_mutex_unlock(&timer_list->active_timers_lock);
196 
197     return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
198 }
199 
200 bool qemu_clock_expired(QEMUClockType type)
201 {
202     return timerlist_expired(
203         main_loop_tlg.tl[type]);
204 }
205 
206 /*
207  * As above, but return -1 for no deadline, and do not cap to 2^32
208  * as we know the result is always positive.
209  */
210 
211 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
212 {
213     int64_t delta;
214     int64_t expire_time;
215 
216     if (!atomic_read(&timer_list->active_timers)) {
217         return -1;
218     }
219 
220     if (!timer_list->clock->enabled) {
221         return -1;
222     }
223 
224     /* The active timers list may be modified before the caller uses our return
225      * value but ->notify_cb() is called when the deadline changes.  Therefore
226      * the caller should notice the change and there is no race condition.
227      */
228     qemu_mutex_lock(&timer_list->active_timers_lock);
229     if (!timer_list->active_timers) {
230         qemu_mutex_unlock(&timer_list->active_timers_lock);
231         return -1;
232     }
233     expire_time = timer_list->active_timers->expire_time;
234     qemu_mutex_unlock(&timer_list->active_timers_lock);
235 
236     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
237 
238     if (delta <= 0) {
239         return 0;
240     }
241 
242     return delta;
243 }
244 
245 /* Calculate the soonest deadline across all timerlists attached
246  * to the clock. This is used for the icount timeout so we
247  * ignore whether or not the clock should be used in deadline
248  * calculations.
249  */
250 int64_t qemu_clock_deadline_ns_all(QEMUClockType type)
251 {
252     int64_t deadline = -1;
253     QEMUTimerList *timer_list;
254     QEMUClock *clock = qemu_clock_ptr(type);
255     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
256         deadline = qemu_soonest_timeout(deadline,
257                                         timerlist_deadline_ns(timer_list));
258     }
259     return deadline;
260 }
261 
262 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
263 {
264     return timer_list->clock->type;
265 }
266 
267 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
268 {
269     return main_loop_tlg.tl[type];
270 }
271 
272 void timerlist_notify(QEMUTimerList *timer_list)
273 {
274     if (timer_list->notify_cb) {
275         timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
276     } else {
277         qemu_notify_event();
278     }
279 }
280 
281 /* Transition function to convert a nanosecond timeout to ms
282  * This is used where a system does not support ppoll
283  */
284 int qemu_timeout_ns_to_ms(int64_t ns)
285 {
286     int64_t ms;
287     if (ns < 0) {
288         return -1;
289     }
290 
291     if (!ns) {
292         return 0;
293     }
294 
295     /* Always round up, because it's better to wait too long than to wait too
296      * little and effectively busy-wait
297      */
298     ms = DIV_ROUND_UP(ns, SCALE_MS);
299 
300     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
301     if (ms > (int64_t) INT32_MAX) {
302         ms = INT32_MAX;
303     }
304 
305     return (int) ms;
306 }
307 
308 
309 /* qemu implementation of g_poll which uses a nanosecond timeout but is
310  * otherwise identical to g_poll
311  */
312 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
313 {
314 #ifdef CONFIG_PPOLL
315     if (timeout < 0) {
316         return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
317     } else {
318         struct timespec ts;
319         int64_t tvsec = timeout / 1000000000LL;
320         /* Avoid possibly overflowing and specifying a negative number of
321          * seconds, which would turn a very long timeout into a busy-wait.
322          */
323         if (tvsec > (int64_t)INT32_MAX) {
324             tvsec = INT32_MAX;
325         }
326         ts.tv_sec = tvsec;
327         ts.tv_nsec = timeout % 1000000000LL;
328         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
329     }
330 #else
331     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
332 #endif
333 }
334 
335 
336 void timer_init_full(QEMUTimer *ts,
337                      QEMUTimerListGroup *timer_list_group, QEMUClockType type,
338                      int scale, int attributes,
339                      QEMUTimerCB *cb, void *opaque)
340 {
341     if (!timer_list_group) {
342         timer_list_group = &main_loop_tlg;
343     }
344     ts->timer_list = timer_list_group->tl[type];
345     ts->cb = cb;
346     ts->opaque = opaque;
347     ts->scale = scale;
348     ts->attributes = attributes;
349     ts->expire_time = -1;
350 }
351 
352 void timer_deinit(QEMUTimer *ts)
353 {
354     assert(ts->expire_time == -1);
355     ts->timer_list = NULL;
356 }
357 
358 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
359 {
360     QEMUTimer **pt, *t;
361 
362     ts->expire_time = -1;
363     pt = &timer_list->active_timers;
364     for(;;) {
365         t = *pt;
366         if (!t)
367             break;
368         if (t == ts) {
369             atomic_set(pt, t->next);
370             break;
371         }
372         pt = &t->next;
373     }
374 }
375 
376 static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
377                                 QEMUTimer *ts, int64_t expire_time)
378 {
379     QEMUTimer **pt, *t;
380 
381     /* add the timer in the sorted list */
382     pt = &timer_list->active_timers;
383     for (;;) {
384         t = *pt;
385         if (!timer_expired_ns(t, expire_time)) {
386             break;
387         }
388         pt = &t->next;
389     }
390     ts->expire_time = MAX(expire_time, 0);
391     ts->next = *pt;
392     atomic_set(pt, ts);
393 
394     return pt == &timer_list->active_timers;
395 }
396 
397 static void timerlist_rearm(QEMUTimerList *timer_list)
398 {
399     /* Interrupt execution to force deadline recalculation.  */
400     if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
401         qemu_start_warp_timer();
402     }
403     timerlist_notify(timer_list);
404 }
405 
406 /* stop a timer, but do not dealloc it */
407 void timer_del(QEMUTimer *ts)
408 {
409     QEMUTimerList *timer_list = ts->timer_list;
410 
411     if (timer_list) {
412         qemu_mutex_lock(&timer_list->active_timers_lock);
413         timer_del_locked(timer_list, ts);
414         qemu_mutex_unlock(&timer_list->active_timers_lock);
415     }
416 }
417 
418 /* modify the current timer so that it will be fired when current_time
419    >= expire_time. The corresponding callback will be called. */
420 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
421 {
422     QEMUTimerList *timer_list = ts->timer_list;
423     bool rearm;
424 
425     qemu_mutex_lock(&timer_list->active_timers_lock);
426     timer_del_locked(timer_list, ts);
427     rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
428     qemu_mutex_unlock(&timer_list->active_timers_lock);
429 
430     if (rearm) {
431         timerlist_rearm(timer_list);
432     }
433 }
434 
435 /* modify the current timer so that it will be fired when current_time
436    >= expire_time or the current deadline, whichever comes earlier.
437    The corresponding callback will be called. */
438 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
439 {
440     QEMUTimerList *timer_list = ts->timer_list;
441     bool rearm;
442 
443     qemu_mutex_lock(&timer_list->active_timers_lock);
444     if (ts->expire_time == -1 || ts->expire_time > expire_time) {
445         if (ts->expire_time != -1) {
446             timer_del_locked(timer_list, ts);
447         }
448         rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
449     } else {
450         rearm = false;
451     }
452     qemu_mutex_unlock(&timer_list->active_timers_lock);
453 
454     if (rearm) {
455         timerlist_rearm(timer_list);
456     }
457 }
458 
459 void timer_mod(QEMUTimer *ts, int64_t expire_time)
460 {
461     timer_mod_ns(ts, expire_time * ts->scale);
462 }
463 
464 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
465 {
466     timer_mod_anticipate_ns(ts, expire_time * ts->scale);
467 }
468 
469 bool timer_pending(QEMUTimer *ts)
470 {
471     return ts->expire_time >= 0;
472 }
473 
474 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
475 {
476     return timer_expired_ns(timer_head, current_time * timer_head->scale);
477 }
478 
479 bool timerlist_run_timers(QEMUTimerList *timer_list)
480 {
481     QEMUTimer *ts;
482     int64_t current_time;
483     bool progress = false;
484     QEMUTimerCB *cb;
485     void *opaque;
486     bool need_replay_checkpoint = false;
487 
488     if (!atomic_read(&timer_list->active_timers)) {
489         return false;
490     }
491 
492     qemu_event_reset(&timer_list->timers_done_ev);
493     if (!timer_list->clock->enabled) {
494         goto out;
495     }
496 
497     switch (timer_list->clock->type) {
498     case QEMU_CLOCK_REALTIME:
499         break;
500     default:
501     case QEMU_CLOCK_VIRTUAL:
502         if (replay_mode != REPLAY_MODE_NONE) {
503             /* Checkpoint for virtual clock is redundant in cases where
504              * it's being triggered with only non-EXTERNAL timers, because
505              * these timers don't change guest state directly.
506              * Since it has conditional dependence on specific timers, it is
507              * subject to race conditions and requires special handling.
508              * See below.
509              */
510             need_replay_checkpoint = true;
511         }
512         break;
513     case QEMU_CLOCK_HOST:
514         if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
515             goto out;
516         }
517         break;
518     case QEMU_CLOCK_VIRTUAL_RT:
519         if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
520             goto out;
521         }
522         break;
523     }
524 
525     /*
526      * Extract expired timers from active timers list and and process them.
527      *
528      * In rr mode we need "filtered" checkpointing for virtual clock.  The
529      * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
530      * and that must only be done once since the clock value stays the same. Because
531      * non-EXTERNAL timers may appear in the timers list while it being processed,
532      * the checkpoint can be issued at a time until no timers are left and we are
533      * done".
534      */
535     current_time = qemu_clock_get_ns(timer_list->clock->type);
536     qemu_mutex_lock(&timer_list->active_timers_lock);
537     while ((ts = timer_list->active_timers)) {
538         if (!timer_expired_ns(ts, current_time)) {
539             /* No expired timers left.  The checkpoint can be skipped
540              * if no timers fired or they were all external.
541              */
542             break;
543         }
544         if (need_replay_checkpoint
545                 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
546             /* once we got here, checkpoint clock only once */
547             need_replay_checkpoint = false;
548             qemu_mutex_unlock(&timer_list->active_timers_lock);
549             if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
550                 goto out;
551             }
552             qemu_mutex_lock(&timer_list->active_timers_lock);
553             /* The lock was released; start over again in case the list was
554              * modified.
555              */
556             continue;
557         }
558 
559         /* remove timer from the list before calling the callback */
560         timer_list->active_timers = ts->next;
561         ts->next = NULL;
562         ts->expire_time = -1;
563         cb = ts->cb;
564         opaque = ts->opaque;
565 
566         /* run the callback (the timer list can be modified) */
567         qemu_mutex_unlock(&timer_list->active_timers_lock);
568         cb(opaque);
569         qemu_mutex_lock(&timer_list->active_timers_lock);
570 
571         progress = true;
572     }
573     qemu_mutex_unlock(&timer_list->active_timers_lock);
574 
575 out:
576     qemu_event_set(&timer_list->timers_done_ev);
577     return progress;
578 }
579 
580 bool qemu_clock_run_timers(QEMUClockType type)
581 {
582     return timerlist_run_timers(main_loop_tlg.tl[type]);
583 }
584 
585 void timerlistgroup_init(QEMUTimerListGroup *tlg,
586                          QEMUTimerListNotifyCB *cb, void *opaque)
587 {
588     QEMUClockType type;
589     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
590         tlg->tl[type] = timerlist_new(type, cb, opaque);
591     }
592 }
593 
594 void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
595 {
596     QEMUClockType type;
597     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
598         timerlist_free(tlg->tl[type]);
599     }
600 }
601 
602 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
603 {
604     QEMUClockType type;
605     bool progress = false;
606     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
607         progress |= timerlist_run_timers(tlg->tl[type]);
608     }
609     return progress;
610 }
611 
612 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
613 {
614     int64_t deadline = -1;
615     QEMUClockType type;
616     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
617         if (qemu_clock_use_for_deadline(type)) {
618             deadline = qemu_soonest_timeout(deadline,
619                                             timerlist_deadline_ns(tlg->tl[type]));
620         }
621     }
622     return deadline;
623 }
624 
625 int64_t qemu_clock_get_ns(QEMUClockType type)
626 {
627     switch (type) {
628     case QEMU_CLOCK_REALTIME:
629         return get_clock();
630     default:
631     case QEMU_CLOCK_VIRTUAL:
632         if (use_icount) {
633             return cpu_get_icount();
634         } else {
635             return cpu_get_clock();
636         }
637     case QEMU_CLOCK_HOST:
638         return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
639     case QEMU_CLOCK_VIRTUAL_RT:
640         return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
641     }
642 }
643 
644 void init_clocks(QEMUTimerListNotifyCB *notify_cb)
645 {
646     QEMUClockType type;
647     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
648         qemu_clock_init(type, notify_cb);
649     }
650 
651 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
652     prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
653 #endif
654 }
655 
656 uint64_t timer_expire_time_ns(QEMUTimer *ts)
657 {
658     return timer_pending(ts) ? ts->expire_time : -1;
659 }
660 
661 bool qemu_clock_run_all_timers(void)
662 {
663     bool progress = false;
664     QEMUClockType type;
665 
666     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
667         if (qemu_clock_use_for_deadline(type)) {
668             progress |= qemu_clock_run_timers(type);
669         }
670     }
671 
672     return progress;
673 }
674