xref: /openbmc/qemu/util/qemu-timer.c (revision 781c67ca)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/timer.h"
28 #include "qemu/lockable.h"
29 #include "sysemu/replay.h"
30 #include "sysemu/cpus.h"
31 
32 #ifdef CONFIG_POSIX
33 #include <pthread.h>
34 #endif
35 
36 #ifdef CONFIG_PPOLL
37 #include <poll.h>
38 #endif
39 
40 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
41 #include <sys/prctl.h>
42 #endif
43 
44 /***********************************************************/
45 /* timers */
46 
47 typedef struct QEMUClock {
48     /* We rely on BQL to protect the timerlists */
49     QLIST_HEAD(, QEMUTimerList) timerlists;
50 
51     QEMUClockType type;
52     bool enabled;
53 } QEMUClock;
54 
55 QEMUTimerListGroup main_loop_tlg;
56 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
57 
58 /* A QEMUTimerList is a list of timers attached to a clock. More
59  * than one QEMUTimerList can be attached to each clock, for instance
60  * used by different AioContexts / threads. Each clock also has
61  * a list of the QEMUTimerLists associated with it, in order that
62  * reenabling the clock can call all the notifiers.
63  */
64 
65 struct QEMUTimerList {
66     QEMUClock *clock;
67     QemuMutex active_timers_lock;
68     QEMUTimer *active_timers;
69     QLIST_ENTRY(QEMUTimerList) list;
70     QEMUTimerListNotifyCB *notify_cb;
71     void *notify_opaque;
72 
73     /* lightweight method to mark the end of timerlist's running */
74     QemuEvent timers_done_ev;
75 };
76 
77 /**
78  * qemu_clock_ptr:
79  * @type: type of clock
80  *
81  * Translate a clock type into a pointer to QEMUClock object.
82  *
83  * Returns: a pointer to the QEMUClock object
84  */
85 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
86 {
87     return &qemu_clocks[type];
88 }
89 
90 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
91 {
92     return timer_head && (timer_head->expire_time <= current_time);
93 }
94 
95 QEMUTimerList *timerlist_new(QEMUClockType type,
96                              QEMUTimerListNotifyCB *cb,
97                              void *opaque)
98 {
99     QEMUTimerList *timer_list;
100     QEMUClock *clock = qemu_clock_ptr(type);
101 
102     timer_list = g_malloc0(sizeof(QEMUTimerList));
103     qemu_event_init(&timer_list->timers_done_ev, true);
104     timer_list->clock = clock;
105     timer_list->notify_cb = cb;
106     timer_list->notify_opaque = opaque;
107     qemu_mutex_init(&timer_list->active_timers_lock);
108     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
109     return timer_list;
110 }
111 
112 void timerlist_free(QEMUTimerList *timer_list)
113 {
114     assert(!timerlist_has_timers(timer_list));
115     if (timer_list->clock) {
116         QLIST_REMOVE(timer_list, list);
117     }
118     qemu_mutex_destroy(&timer_list->active_timers_lock);
119     g_free(timer_list);
120 }
121 
122 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
123 {
124     QEMUClock *clock = qemu_clock_ptr(type);
125 
126     /* Assert that the clock of type TYPE has not been initialized yet. */
127     assert(main_loop_tlg.tl[type] == NULL);
128 
129     clock->type = type;
130     clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
131     QLIST_INIT(&clock->timerlists);
132     main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
133 }
134 
135 bool qemu_clock_use_for_deadline(QEMUClockType type)
136 {
137     return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
138 }
139 
140 void qemu_clock_notify(QEMUClockType type)
141 {
142     QEMUTimerList *timer_list;
143     QEMUClock *clock = qemu_clock_ptr(type);
144     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
145         timerlist_notify(timer_list);
146     }
147 }
148 
149 /* Disabling the clock will wait for related timerlists to stop
150  * executing qemu_run_timers.  Thus, this functions should not
151  * be used from the callback of a timer that is based on @clock.
152  * Doing so would cause a deadlock.
153  *
154  * Caller should hold BQL.
155  */
156 void qemu_clock_enable(QEMUClockType type, bool enabled)
157 {
158     QEMUClock *clock = qemu_clock_ptr(type);
159     QEMUTimerList *tl;
160     bool old = clock->enabled;
161     clock->enabled = enabled;
162     if (enabled && !old) {
163         qemu_clock_notify(type);
164     } else if (!enabled && old) {
165         QLIST_FOREACH(tl, &clock->timerlists, list) {
166             qemu_event_wait(&tl->timers_done_ev);
167         }
168     }
169 }
170 
171 bool timerlist_has_timers(QEMUTimerList *timer_list)
172 {
173     return !!atomic_read(&timer_list->active_timers);
174 }
175 
176 bool qemu_clock_has_timers(QEMUClockType type)
177 {
178     return timerlist_has_timers(
179         main_loop_tlg.tl[type]);
180 }
181 
182 bool timerlist_expired(QEMUTimerList *timer_list)
183 {
184     int64_t expire_time;
185 
186     if (!atomic_read(&timer_list->active_timers)) {
187         return false;
188     }
189 
190     WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
191         if (!timer_list->active_timers) {
192             return false;
193         }
194         expire_time = timer_list->active_timers->expire_time;
195     }
196 
197     return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
198 }
199 
200 bool qemu_clock_expired(QEMUClockType type)
201 {
202     return timerlist_expired(
203         main_loop_tlg.tl[type]);
204 }
205 
206 /*
207  * As above, but return -1 for no deadline, and do not cap to 2^32
208  * as we know the result is always positive.
209  */
210 
211 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
212 {
213     int64_t delta;
214     int64_t expire_time;
215 
216     if (!atomic_read(&timer_list->active_timers)) {
217         return -1;
218     }
219 
220     if (!timer_list->clock->enabled) {
221         return -1;
222     }
223 
224     /* The active timers list may be modified before the caller uses our return
225      * value but ->notify_cb() is called when the deadline changes.  Therefore
226      * the caller should notice the change and there is no race condition.
227      */
228     WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
229         if (!timer_list->active_timers) {
230             return -1;
231         }
232         expire_time = timer_list->active_timers->expire_time;
233     }
234 
235     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
236 
237     if (delta <= 0) {
238         return 0;
239     }
240 
241     return delta;
242 }
243 
244 /* Calculate the soonest deadline across all timerlists attached
245  * to the clock. This is used for the icount timeout so we
246  * ignore whether or not the clock should be used in deadline
247  * calculations.
248  */
249 int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
250 {
251     int64_t deadline = -1;
252     int64_t delta;
253     int64_t expire_time;
254     QEMUTimer *ts;
255     QEMUTimerList *timer_list;
256     QEMUClock *clock = qemu_clock_ptr(type);
257 
258     if (!clock->enabled) {
259         return -1;
260     }
261 
262     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
263         qemu_mutex_lock(&timer_list->active_timers_lock);
264         ts = timer_list->active_timers;
265         /* Skip all external timers */
266         while (ts && (ts->attributes & ~attr_mask)) {
267             ts = ts->next;
268         }
269         if (!ts) {
270             qemu_mutex_unlock(&timer_list->active_timers_lock);
271             continue;
272         }
273         expire_time = ts->expire_time;
274         qemu_mutex_unlock(&timer_list->active_timers_lock);
275 
276         delta = expire_time - qemu_clock_get_ns(type);
277         if (delta <= 0) {
278             delta = 0;
279         }
280         deadline = qemu_soonest_timeout(deadline, delta);
281     }
282     return deadline;
283 }
284 
285 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
286 {
287     return timer_list->clock->type;
288 }
289 
290 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
291 {
292     return main_loop_tlg.tl[type];
293 }
294 
295 void timerlist_notify(QEMUTimerList *timer_list)
296 {
297     if (timer_list->notify_cb) {
298         timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
299     } else {
300         qemu_notify_event();
301     }
302 }
303 
304 /* Transition function to convert a nanosecond timeout to ms
305  * This is used where a system does not support ppoll
306  */
307 int qemu_timeout_ns_to_ms(int64_t ns)
308 {
309     int64_t ms;
310     if (ns < 0) {
311         return -1;
312     }
313 
314     if (!ns) {
315         return 0;
316     }
317 
318     /* Always round up, because it's better to wait too long than to wait too
319      * little and effectively busy-wait
320      */
321     ms = DIV_ROUND_UP(ns, SCALE_MS);
322 
323     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
324     return MIN(ms, INT32_MAX);
325 }
326 
327 
328 /* qemu implementation of g_poll which uses a nanosecond timeout but is
329  * otherwise identical to g_poll
330  */
331 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
332 {
333 #ifdef CONFIG_PPOLL
334     if (timeout < 0) {
335         return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
336     } else {
337         struct timespec ts;
338         int64_t tvsec = timeout / 1000000000LL;
339         /* Avoid possibly overflowing and specifying a negative number of
340          * seconds, which would turn a very long timeout into a busy-wait.
341          */
342         if (tvsec > (int64_t)INT32_MAX) {
343             tvsec = INT32_MAX;
344         }
345         ts.tv_sec = tvsec;
346         ts.tv_nsec = timeout % 1000000000LL;
347         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
348     }
349 #else
350     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
351 #endif
352 }
353 
354 
355 void timer_init_full(QEMUTimer *ts,
356                      QEMUTimerListGroup *timer_list_group, QEMUClockType type,
357                      int scale, int attributes,
358                      QEMUTimerCB *cb, void *opaque)
359 {
360     if (!timer_list_group) {
361         timer_list_group = &main_loop_tlg;
362     }
363     ts->timer_list = timer_list_group->tl[type];
364     ts->cb = cb;
365     ts->opaque = opaque;
366     ts->scale = scale;
367     ts->attributes = attributes;
368     ts->expire_time = -1;
369 }
370 
371 void timer_deinit(QEMUTimer *ts)
372 {
373     assert(ts->expire_time == -1);
374     ts->timer_list = NULL;
375 }
376 
377 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
378 {
379     QEMUTimer **pt, *t;
380 
381     ts->expire_time = -1;
382     pt = &timer_list->active_timers;
383     for(;;) {
384         t = *pt;
385         if (!t)
386             break;
387         if (t == ts) {
388             atomic_set(pt, t->next);
389             break;
390         }
391         pt = &t->next;
392     }
393 }
394 
395 static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
396                                 QEMUTimer *ts, int64_t expire_time)
397 {
398     QEMUTimer **pt, *t;
399 
400     /* add the timer in the sorted list */
401     pt = &timer_list->active_timers;
402     for (;;) {
403         t = *pt;
404         if (!timer_expired_ns(t, expire_time)) {
405             break;
406         }
407         pt = &t->next;
408     }
409     ts->expire_time = MAX(expire_time, 0);
410     ts->next = *pt;
411     atomic_set(pt, ts);
412 
413     return pt == &timer_list->active_timers;
414 }
415 
416 static void timerlist_rearm(QEMUTimerList *timer_list)
417 {
418     /* Interrupt execution to force deadline recalculation.  */
419     if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
420         qemu_start_warp_timer();
421     }
422     timerlist_notify(timer_list);
423 }
424 
425 /* stop a timer, but do not dealloc it */
426 void timer_del(QEMUTimer *ts)
427 {
428     QEMUTimerList *timer_list = ts->timer_list;
429 
430     if (timer_list) {
431         qemu_mutex_lock(&timer_list->active_timers_lock);
432         timer_del_locked(timer_list, ts);
433         qemu_mutex_unlock(&timer_list->active_timers_lock);
434     }
435 }
436 
437 /* modify the current timer so that it will be fired when current_time
438    >= expire_time. The corresponding callback will be called. */
439 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
440 {
441     QEMUTimerList *timer_list = ts->timer_list;
442     bool rearm;
443 
444     qemu_mutex_lock(&timer_list->active_timers_lock);
445     timer_del_locked(timer_list, ts);
446     rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
447     qemu_mutex_unlock(&timer_list->active_timers_lock);
448 
449     if (rearm) {
450         timerlist_rearm(timer_list);
451     }
452 }
453 
454 /* modify the current timer so that it will be fired when current_time
455    >= expire_time or the current deadline, whichever comes earlier.
456    The corresponding callback will be called. */
457 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
458 {
459     QEMUTimerList *timer_list = ts->timer_list;
460     bool rearm;
461 
462     qemu_mutex_lock(&timer_list->active_timers_lock);
463     if (ts->expire_time == -1 || ts->expire_time > expire_time) {
464         if (ts->expire_time != -1) {
465             timer_del_locked(timer_list, ts);
466         }
467         rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
468     } else {
469         rearm = false;
470     }
471     qemu_mutex_unlock(&timer_list->active_timers_lock);
472 
473     if (rearm) {
474         timerlist_rearm(timer_list);
475     }
476 }
477 
478 void timer_mod(QEMUTimer *ts, int64_t expire_time)
479 {
480     timer_mod_ns(ts, expire_time * ts->scale);
481 }
482 
483 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
484 {
485     timer_mod_anticipate_ns(ts, expire_time * ts->scale);
486 }
487 
488 bool timer_pending(QEMUTimer *ts)
489 {
490     return ts->expire_time >= 0;
491 }
492 
493 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
494 {
495     return timer_expired_ns(timer_head, current_time * timer_head->scale);
496 }
497 
498 bool timerlist_run_timers(QEMUTimerList *timer_list)
499 {
500     QEMUTimer *ts;
501     int64_t current_time;
502     bool progress = false;
503     QEMUTimerCB *cb;
504     void *opaque;
505     bool need_replay_checkpoint = false;
506 
507     if (!atomic_read(&timer_list->active_timers)) {
508         return false;
509     }
510 
511     qemu_event_reset(&timer_list->timers_done_ev);
512     if (!timer_list->clock->enabled) {
513         goto out;
514     }
515 
516     switch (timer_list->clock->type) {
517     case QEMU_CLOCK_REALTIME:
518         break;
519     default:
520     case QEMU_CLOCK_VIRTUAL:
521         if (replay_mode != REPLAY_MODE_NONE) {
522             /* Checkpoint for virtual clock is redundant in cases where
523              * it's being triggered with only non-EXTERNAL timers, because
524              * these timers don't change guest state directly.
525              * Since it has conditional dependence on specific timers, it is
526              * subject to race conditions and requires special handling.
527              * See below.
528              */
529             need_replay_checkpoint = true;
530         }
531         break;
532     case QEMU_CLOCK_HOST:
533         if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
534             goto out;
535         }
536         break;
537     case QEMU_CLOCK_VIRTUAL_RT:
538         if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
539             goto out;
540         }
541         break;
542     }
543 
544     /*
545      * Extract expired timers from active timers list and and process them.
546      *
547      * In rr mode we need "filtered" checkpointing for virtual clock.  The
548      * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
549      * and that must only be done once since the clock value stays the same. Because
550      * non-EXTERNAL timers may appear in the timers list while it being processed,
551      * the checkpoint can be issued at a time until no timers are left and we are
552      * done".
553      */
554     current_time = qemu_clock_get_ns(timer_list->clock->type);
555     qemu_mutex_lock(&timer_list->active_timers_lock);
556     while ((ts = timer_list->active_timers)) {
557         if (!timer_expired_ns(ts, current_time)) {
558             /* No expired timers left.  The checkpoint can be skipped
559              * if no timers fired or they were all external.
560              */
561             break;
562         }
563         if (need_replay_checkpoint
564                 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
565             /* once we got here, checkpoint clock only once */
566             need_replay_checkpoint = false;
567             qemu_mutex_unlock(&timer_list->active_timers_lock);
568             if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
569                 goto out;
570             }
571             qemu_mutex_lock(&timer_list->active_timers_lock);
572             /* The lock was released; start over again in case the list was
573              * modified.
574              */
575             continue;
576         }
577 
578         /* remove timer from the list before calling the callback */
579         timer_list->active_timers = ts->next;
580         ts->next = NULL;
581         ts->expire_time = -1;
582         cb = ts->cb;
583         opaque = ts->opaque;
584 
585         /* run the callback (the timer list can be modified) */
586         qemu_mutex_unlock(&timer_list->active_timers_lock);
587         cb(opaque);
588         qemu_mutex_lock(&timer_list->active_timers_lock);
589 
590         progress = true;
591     }
592     qemu_mutex_unlock(&timer_list->active_timers_lock);
593 
594 out:
595     qemu_event_set(&timer_list->timers_done_ev);
596     return progress;
597 }
598 
599 bool qemu_clock_run_timers(QEMUClockType type)
600 {
601     return timerlist_run_timers(main_loop_tlg.tl[type]);
602 }
603 
604 void timerlistgroup_init(QEMUTimerListGroup *tlg,
605                          QEMUTimerListNotifyCB *cb, void *opaque)
606 {
607     QEMUClockType type;
608     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
609         tlg->tl[type] = timerlist_new(type, cb, opaque);
610     }
611 }
612 
613 void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
614 {
615     QEMUClockType type;
616     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
617         timerlist_free(tlg->tl[type]);
618     }
619 }
620 
621 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
622 {
623     QEMUClockType type;
624     bool progress = false;
625     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
626         progress |= timerlist_run_timers(tlg->tl[type]);
627     }
628     return progress;
629 }
630 
631 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
632 {
633     int64_t deadline = -1;
634     QEMUClockType type;
635     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
636         if (qemu_clock_use_for_deadline(type)) {
637             deadline = qemu_soonest_timeout(deadline,
638                                             timerlist_deadline_ns(tlg->tl[type]));
639         }
640     }
641     return deadline;
642 }
643 
644 int64_t qemu_clock_get_ns(QEMUClockType type)
645 {
646     switch (type) {
647     case QEMU_CLOCK_REALTIME:
648         return get_clock();
649     default:
650     case QEMU_CLOCK_VIRTUAL:
651         if (use_icount) {
652             return cpu_get_icount();
653         } else {
654             return cpu_get_clock();
655         }
656     case QEMU_CLOCK_HOST:
657         return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
658     case QEMU_CLOCK_VIRTUAL_RT:
659         return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
660     }
661 }
662 
663 void init_clocks(QEMUTimerListNotifyCB *notify_cb)
664 {
665     QEMUClockType type;
666     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
667         qemu_clock_init(type, notify_cb);
668     }
669 
670 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
671     prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
672 #endif
673 }
674 
675 uint64_t timer_expire_time_ns(QEMUTimer *ts)
676 {
677     return timer_pending(ts) ? ts->expire_time : -1;
678 }
679 
680 bool qemu_clock_run_all_timers(void)
681 {
682     bool progress = false;
683     QEMUClockType type;
684 
685     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
686         if (qemu_clock_use_for_deadline(type)) {
687             progress |= qemu_clock_run_timers(type);
688         }
689     }
690 
691     return progress;
692 }
693