xref: /openbmc/qemu/accel/tcg/icount-common.c (revision 2e1cacfb)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "sysemu/cpus.h"
31 #include "sysemu/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "sysemu/replay.h"
36 #include "sysemu/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "sysemu/cpu-timers.h"
39 #include "sysemu/cpu-throttle.h"
40 #include "sysemu/cpu-timers-internal.h"
41 
42 /*
43  * ICOUNT: Instruction Counter
44  *
45  * this module is split off from cpu-timers because the icount part
46  * is TCG-specific, and does not need to be built for other accels.
47  */
48 static bool icount_sleep = true;
49 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
50 #define MAX_ICOUNT_SHIFT 10
51 
52 /* Do not count executed instructions */
53 ICountMode use_icount = ICOUNT_DISABLED;
54 
55 static void icount_enable_precise(void)
56 {
57     /* Fixed conversion of insn to ns via "shift" option */
58     use_icount = ICOUNT_PRECISE;
59 }
60 
61 static void icount_enable_adaptive(void)
62 {
63     /* Runtime adaptive algorithm to compute shift */
64     use_icount = ICOUNT_ADAPTATIVE;
65 }
66 
67 /*
68  * The current number of executed instructions is based on what we
69  * originally budgeted minus the current state of the decrementing
70  * icount counters in extra/u16.low.
71  */
72 static int64_t icount_get_executed(CPUState *cpu)
73 {
74     return (cpu->icount_budget -
75             (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
76 }
77 
78 /*
79  * Update the global shared timer_state.qemu_icount to take into
80  * account executed instructions. This is done by the TCG vCPU
81  * thread so the main-loop can see time has moved forward.
82  */
83 static void icount_update_locked(CPUState *cpu)
84 {
85     int64_t executed = icount_get_executed(cpu);
86     cpu->icount_budget -= executed;
87 
88     qatomic_set_i64(&timers_state.qemu_icount,
89                     timers_state.qemu_icount + executed);
90 }
91 
92 /*
93  * Update the global shared timer_state.qemu_icount to take into
94  * account executed instructions. This is done by the TCG vCPU
95  * thread so the main-loop can see time has moved forward.
96  */
97 void icount_update(CPUState *cpu)
98 {
99     seqlock_write_lock(&timers_state.vm_clock_seqlock,
100                        &timers_state.vm_clock_lock);
101     icount_update_locked(cpu);
102     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
103                          &timers_state.vm_clock_lock);
104 }
105 
106 static int64_t icount_get_raw_locked(void)
107 {
108     CPUState *cpu = current_cpu;
109 
110     if (cpu && cpu->running) {
111         if (!cpu->neg.can_do_io) {
112             error_report("Bad icount read");
113             exit(1);
114         }
115         /* Take into account what has run */
116         icount_update_locked(cpu);
117     }
118     /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
119     return qatomic_read_i64(&timers_state.qemu_icount);
120 }
121 
122 static int64_t icount_get_locked(void)
123 {
124     int64_t icount = icount_get_raw_locked();
125     return qatomic_read_i64(&timers_state.qemu_icount_bias) +
126         icount_to_ns(icount);
127 }
128 
129 int64_t icount_get_raw(void)
130 {
131     int64_t icount;
132     unsigned start;
133 
134     do {
135         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
136         icount = icount_get_raw_locked();
137     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
138 
139     return icount;
140 }
141 
142 /* Return the virtual CPU time, based on the instruction counter.  */
143 int64_t icount_get(void)
144 {
145     int64_t icount;
146     unsigned start;
147 
148     do {
149         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
150         icount = icount_get_locked();
151     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
152 
153     return icount;
154 }
155 
156 int64_t icount_to_ns(int64_t icount)
157 {
158     return icount << qatomic_read(&timers_state.icount_time_shift);
159 }
160 
161 /*
162  * Correlation between real and virtual time is always going to be
163  * fairly approximate, so ignore small variation.
164  * When the guest is idle real and virtual time will be aligned in
165  * the IO wait loop.
166  */
167 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
168 
169 static void icount_adjust(void)
170 {
171     int64_t cur_time;
172     int64_t cur_icount;
173     int64_t delta;
174 
175     /* If the VM is not running, then do nothing.  */
176     if (!runstate_is_running()) {
177         return;
178     }
179 
180     seqlock_write_lock(&timers_state.vm_clock_seqlock,
181                        &timers_state.vm_clock_lock);
182     cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
183                                    cpu_get_clock_locked());
184     cur_icount = icount_get_locked();
185 
186     delta = cur_icount - cur_time;
187     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
188     if (delta > 0
189         && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
190         && timers_state.icount_time_shift > 0) {
191         /* The guest is getting too far ahead.  Slow time down.  */
192         qatomic_set(&timers_state.icount_time_shift,
193                     timers_state.icount_time_shift - 1);
194     }
195     if (delta < 0
196         && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
197         && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
198         /* The guest is getting too far behind.  Speed time up.  */
199         qatomic_set(&timers_state.icount_time_shift,
200                     timers_state.icount_time_shift + 1);
201     }
202     timers_state.last_delta = delta;
203     qatomic_set_i64(&timers_state.qemu_icount_bias,
204                     cur_icount - (timers_state.qemu_icount
205                                   << timers_state.icount_time_shift));
206     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
207                          &timers_state.vm_clock_lock);
208 }
209 
210 static void icount_adjust_rt(void *opaque)
211 {
212     timer_mod(timers_state.icount_rt_timer,
213               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
214     icount_adjust();
215 }
216 
217 static void icount_adjust_vm(void *opaque)
218 {
219     timer_mod(timers_state.icount_vm_timer,
220                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
221                    NANOSECONDS_PER_SECOND / 10);
222     icount_adjust();
223 }
224 
225 int64_t icount_round(int64_t count)
226 {
227     int shift = qatomic_read(&timers_state.icount_time_shift);
228     return (count + (1 << shift) - 1) >> shift;
229 }
230 
231 static void icount_warp_rt(void)
232 {
233     unsigned seq;
234     int64_t warp_start;
235 
236     /*
237      * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
238      * changes from -1 to another value, so the race here is okay.
239      */
240     do {
241         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
242         warp_start = timers_state.vm_clock_warp_start;
243     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
244 
245     if (warp_start == -1) {
246         return;
247     }
248 
249     seqlock_write_lock(&timers_state.vm_clock_seqlock,
250                        &timers_state.vm_clock_lock);
251     if (runstate_is_running()) {
252         int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
253                                             cpu_get_clock_locked());
254         int64_t warp_delta;
255 
256         warp_delta = clock - timers_state.vm_clock_warp_start;
257         if (icount_enabled() == ICOUNT_ADAPTATIVE) {
258             /*
259              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
260              * ahead of real time (it might already be ahead so careful not
261              * to go backwards).
262              */
263             int64_t cur_icount = icount_get_locked();
264             int64_t delta = clock - cur_icount;
265 
266             if (delta < 0) {
267                 delta = 0;
268             }
269             warp_delta = MIN(warp_delta, delta);
270         }
271         qatomic_set_i64(&timers_state.qemu_icount_bias,
272                         timers_state.qemu_icount_bias + warp_delta);
273     }
274     timers_state.vm_clock_warp_start = -1;
275     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
276                        &timers_state.vm_clock_lock);
277 
278     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
279         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
280     }
281 }
282 
283 static void icount_timer_cb(void *opaque)
284 {
285     /*
286      * No need for a checkpoint because the timer already synchronizes
287      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
288      */
289     icount_warp_rt();
290 }
291 
292 void icount_start_warp_timer(void)
293 {
294     int64_t clock;
295     int64_t deadline;
296 
297     assert(icount_enabled());
298 
299     /*
300      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
301      * do not fire, so computing the deadline does not make sense.
302      */
303     if (!runstate_is_running()) {
304         return;
305     }
306 
307     if (replay_mode != REPLAY_MODE_PLAY) {
308         if (!all_cpu_threads_idle()) {
309             return;
310         }
311 
312         if (qtest_enabled()) {
313             /* When testing, qtest commands advance icount.  */
314             return;
315         }
316 
317         replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
318     } else {
319         /* warp clock deterministically in record/replay mode */
320         if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
321             /*
322              * vCPU is sleeping and warp can't be started.
323              * It is probably a race condition: notification sent
324              * to vCPU was processed in advance and vCPU went to sleep.
325              * Therefore we have to wake it up for doing something.
326              */
327             if (replay_has_event()) {
328                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
329             }
330             return;
331         }
332     }
333 
334     /* We want to use the earliest deadline from ALL vm_clocks */
335     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
336     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
337                                           ~QEMU_TIMER_ATTR_EXTERNAL);
338     if (deadline < 0) {
339         if (!icount_sleep) {
340             warn_report_once("icount sleep disabled and no active timers");
341         }
342         return;
343     }
344 
345     if (deadline > 0) {
346         /*
347          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
348          * sleep.  Otherwise, the CPU might be waiting for a future timer
349          * interrupt to wake it up, but the interrupt never comes because
350          * the vCPU isn't running any insns and thus doesn't advance the
351          * QEMU_CLOCK_VIRTUAL.
352          */
353         if (!icount_sleep) {
354             /*
355              * We never let VCPUs sleep in no sleep icount mode.
356              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
357              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
358              * It is useful when we want a deterministic execution time,
359              * isolated from host latencies.
360              */
361             seqlock_write_lock(&timers_state.vm_clock_seqlock,
362                                &timers_state.vm_clock_lock);
363             qatomic_set_i64(&timers_state.qemu_icount_bias,
364                             timers_state.qemu_icount_bias + deadline);
365             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
366                                  &timers_state.vm_clock_lock);
367             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
368         } else {
369             /*
370              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
371              * "real" time, (related to the time left until the next event) has
372              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
373              * This avoids that the warps are visible externally; for example,
374              * you will not be sending network packets continuously instead of
375              * every 100ms.
376              */
377             seqlock_write_lock(&timers_state.vm_clock_seqlock,
378                                &timers_state.vm_clock_lock);
379             if (timers_state.vm_clock_warp_start == -1
380                 || timers_state.vm_clock_warp_start > clock) {
381                 timers_state.vm_clock_warp_start = clock;
382             }
383             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
384                                  &timers_state.vm_clock_lock);
385             timer_mod_anticipate(timers_state.icount_warp_timer,
386                                  clock + deadline);
387         }
388     } else if (deadline == 0) {
389         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
390     }
391 }
392 
393 void icount_account_warp_timer(void)
394 {
395     if (!icount_sleep) {
396         return;
397     }
398 
399     /*
400      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
401      * do not fire, so computing the deadline does not make sense.
402      */
403     if (!runstate_is_running()) {
404         return;
405     }
406 
407     replay_async_events();
408 
409     /* warp clock deterministically in record/replay mode */
410     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
411         return;
412     }
413 
414     timer_del(timers_state.icount_warp_timer);
415     icount_warp_rt();
416 }
417 
418 bool icount_configure(QemuOpts *opts, Error **errp)
419 {
420     const char *option = qemu_opt_get(opts, "shift");
421     bool sleep = qemu_opt_get_bool(opts, "sleep", true);
422     bool align = qemu_opt_get_bool(opts, "align", false);
423     long time_shift = -1;
424 
425     if (!option) {
426         if (qemu_opt_get(opts, "align") != NULL) {
427             error_setg(errp, "Please specify shift option when using align");
428             return false;
429         }
430         return true;
431     }
432 
433     if (align && !sleep) {
434         error_setg(errp, "align=on and sleep=off are incompatible");
435         return false;
436     }
437 
438     if (strcmp(option, "auto") != 0) {
439         if (qemu_strtol(option, NULL, 0, &time_shift) < 0
440             || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
441             error_setg(errp, "icount: Invalid shift value");
442             return false;
443         }
444     } else if (icount_align_option) {
445         error_setg(errp, "shift=auto and align=on are incompatible");
446         return false;
447     } else if (!icount_sleep) {
448         error_setg(errp, "shift=auto and sleep=off are incompatible");
449         return false;
450     }
451 
452     icount_sleep = sleep;
453     if (icount_sleep) {
454         timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
455                                          icount_timer_cb, NULL);
456     }
457 
458     icount_align_option = align;
459 
460     if (time_shift >= 0) {
461         timers_state.icount_time_shift = time_shift;
462         icount_enable_precise();
463         return true;
464     }
465 
466     icount_enable_adaptive();
467 
468     /*
469      * 125MIPS seems a reasonable initial guess at the guest speed.
470      * It will be corrected fairly quickly anyway.
471      */
472     timers_state.icount_time_shift = 3;
473 
474     /*
475      * Have both realtime and virtual time triggers for speed adjustment.
476      * The realtime trigger catches emulated time passing too slowly,
477      * the virtual time trigger catches emulated time passing too fast.
478      * Realtime triggers occur even when idle, so use them less frequently
479      * than VM triggers.
480      */
481     timers_state.vm_clock_warp_start = -1;
482     timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
483                                    icount_adjust_rt, NULL);
484     timer_mod(timers_state.icount_rt_timer,
485                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
486     timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
487                                         icount_adjust_vm, NULL);
488     timer_mod(timers_state.icount_vm_timer,
489                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
490                    NANOSECONDS_PER_SECOND / 10);
491     return true;
492 }
493 
494 void icount_notify_exit(void)
495 {
496     assert(icount_enabled());
497 
498     if (current_cpu) {
499         qemu_cpu_kick(current_cpu);
500         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
501     }
502 }
503