xref: /openbmc/qemu/accel/tcg/icount-common.c (revision 17b8d8ac)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "sysemu/cpus.h"
31 #include "sysemu/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "sysemu/replay.h"
36 #include "sysemu/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "sysemu/cpu-timers.h"
39 #include "sysemu/cpu-throttle.h"
40 #include "sysemu/cpu-timers-internal.h"
41 
42 /*
43  * ICOUNT: Instruction Counter
44  *
45  * this module is split off from cpu-timers because the icount part
46  * is TCG-specific, and does not need to be built for other accels.
47  */
48 static bool icount_sleep = true;
49 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
50 #define MAX_ICOUNT_SHIFT 10
51 
52 /*
53  * 0 = Do not count executed instructions.
54  * 1 = Fixed conversion of insn to ns via "shift" option
55  * 2 = Runtime adaptive algorithm to compute shift
56  */
57 int use_icount;
58 
59 static void icount_enable_precise(void)
60 {
61     use_icount = 1;
62 }
63 
64 static void icount_enable_adaptive(void)
65 {
66     use_icount = 2;
67 }
68 
69 /*
70  * The current number of executed instructions is based on what we
71  * originally budgeted minus the current state of the decrementing
72  * icount counters in extra/u16.low.
73  */
74 static int64_t icount_get_executed(CPUState *cpu)
75 {
76     return (cpu->icount_budget -
77             (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
78 }
79 
80 /*
81  * Update the global shared timer_state.qemu_icount to take into
82  * account executed instructions. This is done by the TCG vCPU
83  * thread so the main-loop can see time has moved forward.
84  */
85 static void icount_update_locked(CPUState *cpu)
86 {
87     int64_t executed = icount_get_executed(cpu);
88     cpu->icount_budget -= executed;
89 
90     qatomic_set_i64(&timers_state.qemu_icount,
91                     timers_state.qemu_icount + executed);
92 }
93 
94 /*
95  * Update the global shared timer_state.qemu_icount to take into
96  * account executed instructions. This is done by the TCG vCPU
97  * thread so the main-loop can see time has moved forward.
98  */
99 void icount_update(CPUState *cpu)
100 {
101     seqlock_write_lock(&timers_state.vm_clock_seqlock,
102                        &timers_state.vm_clock_lock);
103     icount_update_locked(cpu);
104     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
105                          &timers_state.vm_clock_lock);
106 }
107 
108 static int64_t icount_get_raw_locked(void)
109 {
110     CPUState *cpu = current_cpu;
111 
112     if (cpu && cpu->running) {
113         if (!cpu->neg.can_do_io) {
114             error_report("Bad icount read");
115             exit(1);
116         }
117         /* Take into account what has run */
118         icount_update_locked(cpu);
119     }
120     /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
121     return qatomic_read_i64(&timers_state.qemu_icount);
122 }
123 
124 static int64_t icount_get_locked(void)
125 {
126     int64_t icount = icount_get_raw_locked();
127     return qatomic_read_i64(&timers_state.qemu_icount_bias) +
128         icount_to_ns(icount);
129 }
130 
131 int64_t icount_get_raw(void)
132 {
133     int64_t icount;
134     unsigned start;
135 
136     do {
137         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
138         icount = icount_get_raw_locked();
139     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
140 
141     return icount;
142 }
143 
144 /* Return the virtual CPU time, based on the instruction counter.  */
145 int64_t icount_get(void)
146 {
147     int64_t icount;
148     unsigned start;
149 
150     do {
151         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
152         icount = icount_get_locked();
153     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
154 
155     return icount;
156 }
157 
158 int64_t icount_to_ns(int64_t icount)
159 {
160     return icount << qatomic_read(&timers_state.icount_time_shift);
161 }
162 
163 /*
164  * Correlation between real and virtual time is always going to be
165  * fairly approximate, so ignore small variation.
166  * When the guest is idle real and virtual time will be aligned in
167  * the IO wait loop.
168  */
169 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
170 
171 static void icount_adjust(void)
172 {
173     int64_t cur_time;
174     int64_t cur_icount;
175     int64_t delta;
176 
177     /* If the VM is not running, then do nothing.  */
178     if (!runstate_is_running()) {
179         return;
180     }
181 
182     seqlock_write_lock(&timers_state.vm_clock_seqlock,
183                        &timers_state.vm_clock_lock);
184     cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
185                                    cpu_get_clock_locked());
186     cur_icount = icount_get_locked();
187 
188     delta = cur_icount - cur_time;
189     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
190     if (delta > 0
191         && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
192         && timers_state.icount_time_shift > 0) {
193         /* The guest is getting too far ahead.  Slow time down.  */
194         qatomic_set(&timers_state.icount_time_shift,
195                     timers_state.icount_time_shift - 1);
196     }
197     if (delta < 0
198         && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
199         && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
200         /* The guest is getting too far behind.  Speed time up.  */
201         qatomic_set(&timers_state.icount_time_shift,
202                     timers_state.icount_time_shift + 1);
203     }
204     timers_state.last_delta = delta;
205     qatomic_set_i64(&timers_state.qemu_icount_bias,
206                     cur_icount - (timers_state.qemu_icount
207                                   << timers_state.icount_time_shift));
208     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
209                          &timers_state.vm_clock_lock);
210 }
211 
212 static void icount_adjust_rt(void *opaque)
213 {
214     timer_mod(timers_state.icount_rt_timer,
215               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
216     icount_adjust();
217 }
218 
219 static void icount_adjust_vm(void *opaque)
220 {
221     timer_mod(timers_state.icount_vm_timer,
222                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
223                    NANOSECONDS_PER_SECOND / 10);
224     icount_adjust();
225 }
226 
227 int64_t icount_round(int64_t count)
228 {
229     int shift = qatomic_read(&timers_state.icount_time_shift);
230     return (count + (1 << shift) - 1) >> shift;
231 }
232 
233 static void icount_warp_rt(void)
234 {
235     unsigned seq;
236     int64_t warp_start;
237 
238     /*
239      * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
240      * changes from -1 to another value, so the race here is okay.
241      */
242     do {
243         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
244         warp_start = timers_state.vm_clock_warp_start;
245     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
246 
247     if (warp_start == -1) {
248         return;
249     }
250 
251     seqlock_write_lock(&timers_state.vm_clock_seqlock,
252                        &timers_state.vm_clock_lock);
253     if (runstate_is_running()) {
254         int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
255                                             cpu_get_clock_locked());
256         int64_t warp_delta;
257 
258         warp_delta = clock - timers_state.vm_clock_warp_start;
259         if (icount_enabled() == 2) {
260             /*
261              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
262              * ahead of real time (it might already be ahead so careful not
263              * to go backwards).
264              */
265             int64_t cur_icount = icount_get_locked();
266             int64_t delta = clock - cur_icount;
267 
268             if (delta < 0) {
269                 delta = 0;
270             }
271             warp_delta = MIN(warp_delta, delta);
272         }
273         qatomic_set_i64(&timers_state.qemu_icount_bias,
274                         timers_state.qemu_icount_bias + warp_delta);
275     }
276     timers_state.vm_clock_warp_start = -1;
277     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
278                        &timers_state.vm_clock_lock);
279 
280     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
281         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
282     }
283 }
284 
285 static void icount_timer_cb(void *opaque)
286 {
287     /*
288      * No need for a checkpoint because the timer already synchronizes
289      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
290      */
291     icount_warp_rt();
292 }
293 
294 void icount_start_warp_timer(void)
295 {
296     int64_t clock;
297     int64_t deadline;
298 
299     assert(icount_enabled());
300 
301     /*
302      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
303      * do not fire, so computing the deadline does not make sense.
304      */
305     if (!runstate_is_running()) {
306         return;
307     }
308 
309     if (replay_mode != REPLAY_MODE_PLAY) {
310         if (!all_cpu_threads_idle()) {
311             return;
312         }
313 
314         if (qtest_enabled()) {
315             /* When testing, qtest commands advance icount.  */
316             return;
317         }
318 
319         replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
320     } else {
321         /* warp clock deterministically in record/replay mode */
322         if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
323             /*
324              * vCPU is sleeping and warp can't be started.
325              * It is probably a race condition: notification sent
326              * to vCPU was processed in advance and vCPU went to sleep.
327              * Therefore we have to wake it up for doing something.
328              */
329             if (replay_has_event()) {
330                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
331             }
332             return;
333         }
334     }
335 
336     /* We want to use the earliest deadline from ALL vm_clocks */
337     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
338     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
339                                           ~QEMU_TIMER_ATTR_EXTERNAL);
340     if (deadline < 0) {
341         static bool notified;
342         if (!icount_sleep && !notified) {
343             warn_report("icount sleep disabled and no active timers");
344             notified = true;
345         }
346         return;
347     }
348 
349     if (deadline > 0) {
350         /*
351          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
352          * sleep.  Otherwise, the CPU might be waiting for a future timer
353          * interrupt to wake it up, but the interrupt never comes because
354          * the vCPU isn't running any insns and thus doesn't advance the
355          * QEMU_CLOCK_VIRTUAL.
356          */
357         if (!icount_sleep) {
358             /*
359              * We never let VCPUs sleep in no sleep icount mode.
360              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
361              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
362              * It is useful when we want a deterministic execution time,
363              * isolated from host latencies.
364              */
365             seqlock_write_lock(&timers_state.vm_clock_seqlock,
366                                &timers_state.vm_clock_lock);
367             qatomic_set_i64(&timers_state.qemu_icount_bias,
368                             timers_state.qemu_icount_bias + deadline);
369             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
370                                  &timers_state.vm_clock_lock);
371             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
372         } else {
373             /*
374              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
375              * "real" time, (related to the time left until the next event) has
376              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
377              * This avoids that the warps are visible externally; for example,
378              * you will not be sending network packets continuously instead of
379              * every 100ms.
380              */
381             seqlock_write_lock(&timers_state.vm_clock_seqlock,
382                                &timers_state.vm_clock_lock);
383             if (timers_state.vm_clock_warp_start == -1
384                 || timers_state.vm_clock_warp_start > clock) {
385                 timers_state.vm_clock_warp_start = clock;
386             }
387             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
388                                  &timers_state.vm_clock_lock);
389             timer_mod_anticipate(timers_state.icount_warp_timer,
390                                  clock + deadline);
391         }
392     } else if (deadline == 0) {
393         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
394     }
395 }
396 
397 void icount_account_warp_timer(void)
398 {
399     if (!icount_sleep) {
400         return;
401     }
402 
403     /*
404      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
405      * do not fire, so computing the deadline does not make sense.
406      */
407     if (!runstate_is_running()) {
408         return;
409     }
410 
411     replay_async_events();
412 
413     /* warp clock deterministically in record/replay mode */
414     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
415         return;
416     }
417 
418     timer_del(timers_state.icount_warp_timer);
419     icount_warp_rt();
420 }
421 
422 void icount_configure(QemuOpts *opts, Error **errp)
423 {
424     const char *option = qemu_opt_get(opts, "shift");
425     bool sleep = qemu_opt_get_bool(opts, "sleep", true);
426     bool align = qemu_opt_get_bool(opts, "align", false);
427     long time_shift = -1;
428 
429     if (!option) {
430         if (qemu_opt_get(opts, "align") != NULL) {
431             error_setg(errp, "Please specify shift option when using align");
432         }
433         return;
434     }
435 
436     if (align && !sleep) {
437         error_setg(errp, "align=on and sleep=off are incompatible");
438         return;
439     }
440 
441     if (strcmp(option, "auto") != 0) {
442         if (qemu_strtol(option, NULL, 0, &time_shift) < 0
443             || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
444             error_setg(errp, "icount: Invalid shift value");
445             return;
446         }
447     } else if (icount_align_option) {
448         error_setg(errp, "shift=auto and align=on are incompatible");
449         return;
450     } else if (!icount_sleep) {
451         error_setg(errp, "shift=auto and sleep=off are incompatible");
452         return;
453     }
454 
455     icount_sleep = sleep;
456     if (icount_sleep) {
457         timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
458                                          icount_timer_cb, NULL);
459     }
460 
461     icount_align_option = align;
462 
463     if (time_shift >= 0) {
464         timers_state.icount_time_shift = time_shift;
465         icount_enable_precise();
466         return;
467     }
468 
469     icount_enable_adaptive();
470 
471     /*
472      * 125MIPS seems a reasonable initial guess at the guest speed.
473      * It will be corrected fairly quickly anyway.
474      */
475     timers_state.icount_time_shift = 3;
476 
477     /*
478      * Have both realtime and virtual time triggers for speed adjustment.
479      * The realtime trigger catches emulated time passing too slowly,
480      * the virtual time trigger catches emulated time passing too fast.
481      * Realtime triggers occur even when idle, so use them less frequently
482      * than VM triggers.
483      */
484     timers_state.vm_clock_warp_start = -1;
485     timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
486                                    icount_adjust_rt, NULL);
487     timer_mod(timers_state.icount_rt_timer,
488                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
489     timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
490                                         icount_adjust_vm, NULL);
491     timer_mod(timers_state.icount_vm_timer,
492                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
493                    NANOSECONDS_PER_SECOND / 10);
494 }
495 
496 void icount_notify_exit(void)
497 {
498     if (icount_enabled() && current_cpu) {
499         qemu_cpu_kick(current_cpu);
500         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
501     }
502 }
503