xref: /openbmc/qemu/util/qemu-timer.c (revision 2fc979cb)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/timer.h"
28 #include "qemu/lockable.h"
29 #include "sysemu/replay.h"
30 #include "sysemu/cpus.h"
31 
32 #ifdef CONFIG_POSIX
33 #include <pthread.h>
34 #endif
35 
36 #ifdef CONFIG_PPOLL
37 #include <poll.h>
38 #endif
39 
40 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
41 #include <sys/prctl.h>
42 #endif
43 
44 /***********************************************************/
45 /* timers */
46 
47 typedef struct QEMUClock {
48     /* We rely on BQL to protect the timerlists */
49     QLIST_HEAD(, QEMUTimerList) timerlists;
50 
51     QEMUClockType type;
52     bool enabled;
53 } QEMUClock;
54 
55 QEMUTimerListGroup main_loop_tlg;
56 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
57 
58 /* A QEMUTimerList is a list of timers attached to a clock. More
59  * than one QEMUTimerList can be attached to each clock, for instance
60  * used by different AioContexts / threads. Each clock also has
61  * a list of the QEMUTimerLists associated with it, in order that
62  * reenabling the clock can call all the notifiers.
63  */
64 
65 struct QEMUTimerList {
66     QEMUClock *clock;
67     QemuMutex active_timers_lock;
68     QEMUTimer *active_timers;
69     QLIST_ENTRY(QEMUTimerList) list;
70     QEMUTimerListNotifyCB *notify_cb;
71     void *notify_opaque;
72 
73     /* lightweight method to mark the end of timerlist's running */
74     QemuEvent timers_done_ev;
75 };
76 
77 /**
78  * qemu_clock_ptr:
79  * @type: type of clock
80  *
81  * Translate a clock type into a pointer to QEMUClock object.
82  *
83  * Returns: a pointer to the QEMUClock object
84  */
85 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
86 {
87     return &qemu_clocks[type];
88 }
89 
90 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
91 {
92     return timer_head && (timer_head->expire_time <= current_time);
93 }
94 
95 QEMUTimerList *timerlist_new(QEMUClockType type,
96                              QEMUTimerListNotifyCB *cb,
97                              void *opaque)
98 {
99     QEMUTimerList *timer_list;
100     QEMUClock *clock = qemu_clock_ptr(type);
101 
102     timer_list = g_malloc0(sizeof(QEMUTimerList));
103     qemu_event_init(&timer_list->timers_done_ev, true);
104     timer_list->clock = clock;
105     timer_list->notify_cb = cb;
106     timer_list->notify_opaque = opaque;
107     qemu_mutex_init(&timer_list->active_timers_lock);
108     QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
109     return timer_list;
110 }
111 
112 void timerlist_free(QEMUTimerList *timer_list)
113 {
114     assert(!timerlist_has_timers(timer_list));
115     if (timer_list->clock) {
116         QLIST_REMOVE(timer_list, list);
117     }
118     qemu_mutex_destroy(&timer_list->active_timers_lock);
119     g_free(timer_list);
120 }
121 
122 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
123 {
124     QEMUClock *clock = qemu_clock_ptr(type);
125 
126     /* Assert that the clock of type TYPE has not been initialized yet. */
127     assert(main_loop_tlg.tl[type] == NULL);
128 
129     clock->type = type;
130     clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
131     QLIST_INIT(&clock->timerlists);
132     main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
133 }
134 
135 bool qemu_clock_use_for_deadline(QEMUClockType type)
136 {
137     return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
138 }
139 
140 void qemu_clock_notify(QEMUClockType type)
141 {
142     QEMUTimerList *timer_list;
143     QEMUClock *clock = qemu_clock_ptr(type);
144     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
145         timerlist_notify(timer_list);
146     }
147 }
148 
149 /* Disabling the clock will wait for related timerlists to stop
150  * executing qemu_run_timers.  Thus, this functions should not
151  * be used from the callback of a timer that is based on @clock.
152  * Doing so would cause a deadlock.
153  *
154  * Caller should hold BQL.
155  */
156 void qemu_clock_enable(QEMUClockType type, bool enabled)
157 {
158     QEMUClock *clock = qemu_clock_ptr(type);
159     QEMUTimerList *tl;
160     bool old = clock->enabled;
161     clock->enabled = enabled;
162     if (enabled && !old) {
163         qemu_clock_notify(type);
164     } else if (!enabled && old) {
165         QLIST_FOREACH(tl, &clock->timerlists, list) {
166             qemu_event_wait(&tl->timers_done_ev);
167         }
168     }
169 }
170 
171 bool timerlist_has_timers(QEMUTimerList *timer_list)
172 {
173     return !!atomic_read(&timer_list->active_timers);
174 }
175 
176 bool qemu_clock_has_timers(QEMUClockType type)
177 {
178     return timerlist_has_timers(
179         main_loop_tlg.tl[type]);
180 }
181 
182 bool timerlist_expired(QEMUTimerList *timer_list)
183 {
184     int64_t expire_time;
185 
186     if (!atomic_read(&timer_list->active_timers)) {
187         return false;
188     }
189 
190     WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
191         if (!timer_list->active_timers) {
192             return false;
193         }
194         expire_time = timer_list->active_timers->expire_time;
195     }
196 
197     return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
198 }
199 
200 bool qemu_clock_expired(QEMUClockType type)
201 {
202     return timerlist_expired(
203         main_loop_tlg.tl[type]);
204 }
205 
206 /*
207  * As above, but return -1 for no deadline, and do not cap to 2^32
208  * as we know the result is always positive.
209  */
210 
211 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
212 {
213     int64_t delta;
214     int64_t expire_time;
215 
216     if (!atomic_read(&timer_list->active_timers)) {
217         return -1;
218     }
219 
220     if (!timer_list->clock->enabled) {
221         return -1;
222     }
223 
224     /* The active timers list may be modified before the caller uses our return
225      * value but ->notify_cb() is called when the deadline changes.  Therefore
226      * the caller should notice the change and there is no race condition.
227      */
228     WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
229         if (!timer_list->active_timers) {
230             return -1;
231         }
232         expire_time = timer_list->active_timers->expire_time;
233     }
234 
235     delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
236 
237     if (delta <= 0) {
238         return 0;
239     }
240 
241     return delta;
242 }
243 
244 /* Calculate the soonest deadline across all timerlists attached
245  * to the clock. This is used for the icount timeout so we
246  * ignore whether or not the clock should be used in deadline
247  * calculations.
248  */
249 int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
250 {
251     int64_t deadline = -1;
252     int64_t delta;
253     int64_t expire_time;
254     QEMUTimer *ts;
255     QEMUTimerList *timer_list;
256     QEMUClock *clock = qemu_clock_ptr(type);
257 
258     if (!clock->enabled) {
259         return -1;
260     }
261 
262     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
263         qemu_mutex_lock(&timer_list->active_timers_lock);
264         ts = timer_list->active_timers;
265         /* Skip all external timers */
266         while (ts && (ts->attributes & ~attr_mask)) {
267             ts = ts->next;
268         }
269         if (!ts) {
270             qemu_mutex_unlock(&timer_list->active_timers_lock);
271             continue;
272         }
273         expire_time = ts->expire_time;
274         qemu_mutex_unlock(&timer_list->active_timers_lock);
275 
276         delta = expire_time - qemu_clock_get_ns(type);
277         if (delta <= 0) {
278             delta = 0;
279         }
280         deadline = qemu_soonest_timeout(deadline, delta);
281     }
282     return deadline;
283 }
284 
285 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
286 {
287     return timer_list->clock->type;
288 }
289 
290 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
291 {
292     return main_loop_tlg.tl[type];
293 }
294 
295 void timerlist_notify(QEMUTimerList *timer_list)
296 {
297     if (timer_list->notify_cb) {
298         timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
299     } else {
300         qemu_notify_event();
301     }
302 }
303 
304 /* Transition function to convert a nanosecond timeout to ms
305  * This is used where a system does not support ppoll
306  */
307 int qemu_timeout_ns_to_ms(int64_t ns)
308 {
309     int64_t ms;
310     if (ns < 0) {
311         return -1;
312     }
313 
314     if (!ns) {
315         return 0;
316     }
317 
318     /* Always round up, because it's better to wait too long than to wait too
319      * little and effectively busy-wait
320      */
321     ms = DIV_ROUND_UP(ns, SCALE_MS);
322 
323     /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
324     return MIN(ms, INT32_MAX);
325 }
326 
327 
328 /* qemu implementation of g_poll which uses a nanosecond timeout but is
329  * otherwise identical to g_poll
330  */
331 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
332 {
333 #ifdef CONFIG_PPOLL
334     if (timeout < 0) {
335         return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
336     } else {
337         struct timespec ts;
338         int64_t tvsec = timeout / 1000000000LL;
339         /* Avoid possibly overflowing and specifying a negative number of
340          * seconds, which would turn a very long timeout into a busy-wait.
341          */
342         if (tvsec > (int64_t)INT32_MAX) {
343             tvsec = INT32_MAX;
344         }
345         ts.tv_sec = tvsec;
346         ts.tv_nsec = timeout % 1000000000LL;
347         return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
348     }
349 #else
350     return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
351 #endif
352 }
353 
354 
355 void timer_init_full(QEMUTimer *ts,
356                      QEMUTimerListGroup *timer_list_group, QEMUClockType type,
357                      int scale, int attributes,
358                      QEMUTimerCB *cb, void *opaque)
359 {
360     if (!timer_list_group) {
361         timer_list_group = &main_loop_tlg;
362     }
363     ts->timer_list = timer_list_group->tl[type];
364     ts->cb = cb;
365     ts->opaque = opaque;
366     ts->scale = scale;
367     ts->attributes = attributes;
368     ts->expire_time = -1;
369 }
370 
371 void timer_deinit(QEMUTimer *ts)
372 {
373     assert(ts->expire_time == -1);
374     ts->timer_list = NULL;
375 }
376 
377 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
378 {
379     QEMUTimer **pt, *t;
380 
381     ts->expire_time = -1;
382     pt = &timer_list->active_timers;
383     for(;;) {
384         t = *pt;
385         if (!t)
386             break;
387         if (t == ts) {
388             atomic_set(pt, t->next);
389             break;
390         }
391         pt = &t->next;
392     }
393 }
394 
395 static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
396                                 QEMUTimer *ts, int64_t expire_time)
397 {
398     QEMUTimer **pt, *t;
399 
400     /* add the timer in the sorted list */
401     pt = &timer_list->active_timers;
402     for (;;) {
403         t = *pt;
404         if (!timer_expired_ns(t, expire_time)) {
405             break;
406         }
407         pt = &t->next;
408     }
409     ts->expire_time = MAX(expire_time, 0);
410     ts->next = *pt;
411     atomic_set(pt, ts);
412 
413     return pt == &timer_list->active_timers;
414 }
415 
416 static void timerlist_rearm(QEMUTimerList *timer_list)
417 {
418     /* Interrupt execution to force deadline recalculation.  */
419     if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
420         qemu_start_warp_timer();
421     }
422     timerlist_notify(timer_list);
423 }
424 
425 /* stop a timer, but do not dealloc it */
426 void timer_del(QEMUTimer *ts)
427 {
428     QEMUTimerList *timer_list = ts->timer_list;
429 
430     if (timer_list) {
431         qemu_mutex_lock(&timer_list->active_timers_lock);
432         timer_del_locked(timer_list, ts);
433         qemu_mutex_unlock(&timer_list->active_timers_lock);
434     }
435 }
436 
437 /* modify the current timer so that it will be fired when current_time
438    >= expire_time. The corresponding callback will be called. */
439 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
440 {
441     QEMUTimerList *timer_list = ts->timer_list;
442     bool rearm;
443 
444     qemu_mutex_lock(&timer_list->active_timers_lock);
445     timer_del_locked(timer_list, ts);
446     rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
447     qemu_mutex_unlock(&timer_list->active_timers_lock);
448 
449     if (rearm) {
450         timerlist_rearm(timer_list);
451     }
452 }
453 
454 /* modify the current timer so that it will be fired when current_time
455    >= expire_time or the current deadline, whichever comes earlier.
456    The corresponding callback will be called. */
457 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
458 {
459     QEMUTimerList *timer_list = ts->timer_list;
460     bool rearm;
461 
462     WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
463         if (ts->expire_time == -1 || ts->expire_time > expire_time) {
464             if (ts->expire_time != -1) {
465                 timer_del_locked(timer_list, ts);
466             }
467             rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
468         } else {
469             rearm = false;
470         }
471     }
472     if (rearm) {
473         timerlist_rearm(timer_list);
474     }
475 }
476 
477 void timer_mod(QEMUTimer *ts, int64_t expire_time)
478 {
479     timer_mod_ns(ts, expire_time * ts->scale);
480 }
481 
482 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
483 {
484     timer_mod_anticipate_ns(ts, expire_time * ts->scale);
485 }
486 
487 bool timer_pending(QEMUTimer *ts)
488 {
489     return ts->expire_time >= 0;
490 }
491 
492 bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
493 {
494     return timer_expired_ns(timer_head, current_time * timer_head->scale);
495 }
496 
497 bool timerlist_run_timers(QEMUTimerList *timer_list)
498 {
499     QEMUTimer *ts;
500     int64_t current_time;
501     bool progress = false;
502     QEMUTimerCB *cb;
503     void *opaque;
504     bool need_replay_checkpoint = false;
505 
506     if (!atomic_read(&timer_list->active_timers)) {
507         return false;
508     }
509 
510     qemu_event_reset(&timer_list->timers_done_ev);
511     if (!timer_list->clock->enabled) {
512         goto out;
513     }
514 
515     switch (timer_list->clock->type) {
516     case QEMU_CLOCK_REALTIME:
517         break;
518     default:
519     case QEMU_CLOCK_VIRTUAL:
520         if (replay_mode != REPLAY_MODE_NONE) {
521             /* Checkpoint for virtual clock is redundant in cases where
522              * it's being triggered with only non-EXTERNAL timers, because
523              * these timers don't change guest state directly.
524              * Since it has conditional dependence on specific timers, it is
525              * subject to race conditions and requires special handling.
526              * See below.
527              */
528             need_replay_checkpoint = true;
529         }
530         break;
531     case QEMU_CLOCK_HOST:
532         if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
533             goto out;
534         }
535         break;
536     case QEMU_CLOCK_VIRTUAL_RT:
537         if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
538             goto out;
539         }
540         break;
541     }
542 
543     /*
544      * Extract expired timers from active timers list and and process them.
545      *
546      * In rr mode we need "filtered" checkpointing for virtual clock.  The
547      * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
548      * and that must only be done once since the clock value stays the same. Because
549      * non-EXTERNAL timers may appear in the timers list while it being processed,
550      * the checkpoint can be issued at a time until no timers are left and we are
551      * done".
552      */
553     current_time = qemu_clock_get_ns(timer_list->clock->type);
554     qemu_mutex_lock(&timer_list->active_timers_lock);
555     while ((ts = timer_list->active_timers)) {
556         if (!timer_expired_ns(ts, current_time)) {
557             /* No expired timers left.  The checkpoint can be skipped
558              * if no timers fired or they were all external.
559              */
560             break;
561         }
562         if (need_replay_checkpoint
563                 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) {
564             /* once we got here, checkpoint clock only once */
565             need_replay_checkpoint = false;
566             qemu_mutex_unlock(&timer_list->active_timers_lock);
567             if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
568                 goto out;
569             }
570             qemu_mutex_lock(&timer_list->active_timers_lock);
571             /* The lock was released; start over again in case the list was
572              * modified.
573              */
574             continue;
575         }
576 
577         /* remove timer from the list before calling the callback */
578         timer_list->active_timers = ts->next;
579         ts->next = NULL;
580         ts->expire_time = -1;
581         cb = ts->cb;
582         opaque = ts->opaque;
583 
584         /* run the callback (the timer list can be modified) */
585         qemu_mutex_unlock(&timer_list->active_timers_lock);
586         cb(opaque);
587         qemu_mutex_lock(&timer_list->active_timers_lock);
588 
589         progress = true;
590     }
591     qemu_mutex_unlock(&timer_list->active_timers_lock);
592 
593 out:
594     qemu_event_set(&timer_list->timers_done_ev);
595     return progress;
596 }
597 
598 bool qemu_clock_run_timers(QEMUClockType type)
599 {
600     return timerlist_run_timers(main_loop_tlg.tl[type]);
601 }
602 
603 void timerlistgroup_init(QEMUTimerListGroup *tlg,
604                          QEMUTimerListNotifyCB *cb, void *opaque)
605 {
606     QEMUClockType type;
607     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
608         tlg->tl[type] = timerlist_new(type, cb, opaque);
609     }
610 }
611 
612 void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
613 {
614     QEMUClockType type;
615     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
616         timerlist_free(tlg->tl[type]);
617     }
618 }
619 
620 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
621 {
622     QEMUClockType type;
623     bool progress = false;
624     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
625         progress |= timerlist_run_timers(tlg->tl[type]);
626     }
627     return progress;
628 }
629 
630 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
631 {
632     int64_t deadline = -1;
633     QEMUClockType type;
634     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
635         if (qemu_clock_use_for_deadline(type)) {
636             deadline = qemu_soonest_timeout(deadline,
637                                             timerlist_deadline_ns(tlg->tl[type]));
638         }
639     }
640     return deadline;
641 }
642 
643 int64_t qemu_clock_get_ns(QEMUClockType type)
644 {
645     switch (type) {
646     case QEMU_CLOCK_REALTIME:
647         return get_clock();
648     default:
649     case QEMU_CLOCK_VIRTUAL:
650         if (use_icount) {
651             return cpu_get_icount();
652         } else {
653             return cpu_get_clock();
654         }
655     case QEMU_CLOCK_HOST:
656         return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
657     case QEMU_CLOCK_VIRTUAL_RT:
658         return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
659     }
660 }
661 
662 void init_clocks(QEMUTimerListNotifyCB *notify_cb)
663 {
664     QEMUClockType type;
665     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
666         qemu_clock_init(type, notify_cb);
667     }
668 
669 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
670     prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
671 #endif
672 }
673 
674 uint64_t timer_expire_time_ns(QEMUTimer *ts)
675 {
676     return timer_pending(ts) ? ts->expire_time : -1;
677 }
678 
679 bool qemu_clock_run_all_timers(void)
680 {
681     bool progress = false;
682     QEMUClockType type;
683 
684     for (type = 0; type < QEMU_CLOCK_MAX; type++) {
685         if (qemu_clock_use_for_deadline(type)) {
686             progress |= qemu_clock_run_timers(type);
687         }
688     }
689 
690     return progress;
691 }
692