xref: /openbmc/qemu/accel/tcg/icount-common.c (revision cbad45511840077dafb6e1d1bc2e228baabecff5)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "sysemu/cpus.h"
31 #include "sysemu/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "sysemu/replay.h"
36 #include "sysemu/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "sysemu/cpu-timers.h"
39 #include "sysemu/cpu-timers-internal.h"
40 
41 /*
42  * ICOUNT: Instruction Counter
43  *
44  * this module is split off from cpu-timers because the icount part
45  * is TCG-specific, and does not need to be built for other accels.
46  */
47 static bool icount_sleep = true;
48 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
49 #define MAX_ICOUNT_SHIFT 10
50 
51 /* Do not count executed instructions */
52 ICountMode use_icount = ICOUNT_DISABLED;
53 
icount_enable_precise(void)54 static void icount_enable_precise(void)
55 {
56     /* Fixed conversion of insn to ns via "shift" option */
57     use_icount = ICOUNT_PRECISE;
58 }
59 
icount_enable_adaptive(void)60 static void icount_enable_adaptive(void)
61 {
62     /* Runtime adaptive algorithm to compute shift */
63     use_icount = ICOUNT_ADAPTATIVE;
64 }
65 
66 /*
67  * The current number of executed instructions is based on what we
68  * originally budgeted minus the current state of the decrementing
69  * icount counters in extra/u16.low.
70  */
icount_get_executed(CPUState * cpu)71 static int64_t icount_get_executed(CPUState *cpu)
72 {
73     return (cpu->icount_budget -
74             (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
75 }
76 
77 /*
78  * Update the global shared timer_state.qemu_icount to take into
79  * account executed instructions. This is done by the TCG vCPU
80  * thread so the main-loop can see time has moved forward.
81  */
icount_update_locked(CPUState * cpu)82 static void icount_update_locked(CPUState *cpu)
83 {
84     int64_t executed = icount_get_executed(cpu);
85     cpu->icount_budget -= executed;
86 
87     qatomic_set_i64(&timers_state.qemu_icount,
88                     timers_state.qemu_icount + executed);
89 }
90 
91 /*
92  * Update the global shared timer_state.qemu_icount to take into
93  * account executed instructions. This is done by the TCG vCPU
94  * thread so the main-loop can see time has moved forward.
95  */
icount_update(CPUState * cpu)96 void icount_update(CPUState *cpu)
97 {
98     seqlock_write_lock(&timers_state.vm_clock_seqlock,
99                        &timers_state.vm_clock_lock);
100     icount_update_locked(cpu);
101     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
102                          &timers_state.vm_clock_lock);
103 }
104 
icount_get_raw_locked(void)105 static int64_t icount_get_raw_locked(void)
106 {
107     CPUState *cpu = current_cpu;
108 
109     if (cpu && cpu->running) {
110         if (!cpu->neg.can_do_io) {
111             error_report("Bad icount read");
112             exit(1);
113         }
114         /* Take into account what has run */
115         icount_update_locked(cpu);
116     }
117     /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
118     return qatomic_read_i64(&timers_state.qemu_icount);
119 }
120 
icount_get_locked(void)121 static int64_t icount_get_locked(void)
122 {
123     int64_t icount = icount_get_raw_locked();
124     return qatomic_read_i64(&timers_state.qemu_icount_bias) +
125         icount_to_ns(icount);
126 }
127 
icount_get_raw(void)128 int64_t icount_get_raw(void)
129 {
130     int64_t icount;
131     unsigned start;
132 
133     do {
134         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
135         icount = icount_get_raw_locked();
136     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
137 
138     return icount;
139 }
140 
141 /* Return the virtual CPU time, based on the instruction counter.  */
icount_get(void)142 int64_t icount_get(void)
143 {
144     int64_t icount;
145     unsigned start;
146 
147     do {
148         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
149         icount = icount_get_locked();
150     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
151 
152     return icount;
153 }
154 
icount_to_ns(int64_t icount)155 int64_t icount_to_ns(int64_t icount)
156 {
157     return icount << qatomic_read(&timers_state.icount_time_shift);
158 }
159 
160 /*
161  * Correlation between real and virtual time is always going to be
162  * fairly approximate, so ignore small variation.
163  * When the guest is idle real and virtual time will be aligned in
164  * the IO wait loop.
165  */
166 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
167 
icount_adjust(void)168 static void icount_adjust(void)
169 {
170     int64_t cur_time;
171     int64_t cur_icount;
172     int64_t delta;
173 
174     /* If the VM is not running, then do nothing.  */
175     if (!runstate_is_running()) {
176         return;
177     }
178 
179     seqlock_write_lock(&timers_state.vm_clock_seqlock,
180                        &timers_state.vm_clock_lock);
181     cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
182                                    cpu_get_clock_locked());
183     cur_icount = icount_get_locked();
184 
185     delta = cur_icount - cur_time;
186     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
187     if (delta > 0
188         && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
189         && timers_state.icount_time_shift > 0) {
190         /* The guest is getting too far ahead.  Slow time down.  */
191         qatomic_set(&timers_state.icount_time_shift,
192                     timers_state.icount_time_shift - 1);
193     }
194     if (delta < 0
195         && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
196         && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
197         /* The guest is getting too far behind.  Speed time up.  */
198         qatomic_set(&timers_state.icount_time_shift,
199                     timers_state.icount_time_shift + 1);
200     }
201     timers_state.last_delta = delta;
202     qatomic_set_i64(&timers_state.qemu_icount_bias,
203                     cur_icount - (timers_state.qemu_icount
204                                   << timers_state.icount_time_shift));
205     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
206                          &timers_state.vm_clock_lock);
207 }
208 
icount_adjust_rt(void * opaque)209 static void icount_adjust_rt(void *opaque)
210 {
211     timer_mod(timers_state.icount_rt_timer,
212               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
213     icount_adjust();
214 }
215 
icount_adjust_vm(void * opaque)216 static void icount_adjust_vm(void *opaque)
217 {
218     timer_mod(timers_state.icount_vm_timer,
219                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
220                    NANOSECONDS_PER_SECOND / 10);
221     icount_adjust();
222 }
223 
icount_round(int64_t count)224 int64_t icount_round(int64_t count)
225 {
226     int shift = qatomic_read(&timers_state.icount_time_shift);
227     return (count + (1 << shift) - 1) >> shift;
228 }
229 
icount_warp_rt(void)230 static void icount_warp_rt(void)
231 {
232     unsigned seq;
233     int64_t warp_start;
234 
235     /*
236      * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
237      * changes from -1 to another value, so the race here is okay.
238      */
239     do {
240         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
241         warp_start = timers_state.vm_clock_warp_start;
242     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
243 
244     if (warp_start == -1) {
245         return;
246     }
247 
248     seqlock_write_lock(&timers_state.vm_clock_seqlock,
249                        &timers_state.vm_clock_lock);
250     if (runstate_is_running()) {
251         int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
252                                             cpu_get_clock_locked());
253         int64_t warp_delta;
254 
255         warp_delta = clock - timers_state.vm_clock_warp_start;
256         if (icount_enabled() == ICOUNT_ADAPTATIVE) {
257             /*
258              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
259              * ahead of real time (it might already be ahead so careful not
260              * to go backwards).
261              */
262             int64_t cur_icount = icount_get_locked();
263             int64_t delta = clock - cur_icount;
264 
265             if (delta < 0) {
266                 delta = 0;
267             }
268             warp_delta = MIN(warp_delta, delta);
269         }
270         qatomic_set_i64(&timers_state.qemu_icount_bias,
271                         timers_state.qemu_icount_bias + warp_delta);
272     }
273     timers_state.vm_clock_warp_start = -1;
274     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
275                        &timers_state.vm_clock_lock);
276 
277     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
278         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
279     }
280 }
281 
icount_timer_cb(void * opaque)282 static void icount_timer_cb(void *opaque)
283 {
284     /*
285      * No need for a checkpoint because the timer already synchronizes
286      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
287      */
288     icount_warp_rt();
289 }
290 
icount_start_warp_timer(void)291 void icount_start_warp_timer(void)
292 {
293     int64_t clock;
294     int64_t deadline;
295 
296     assert(icount_enabled());
297 
298     /*
299      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
300      * do not fire, so computing the deadline does not make sense.
301      */
302     if (!runstate_is_running()) {
303         return;
304     }
305 
306     if (replay_mode != REPLAY_MODE_PLAY) {
307         if (!all_cpu_threads_idle()) {
308             return;
309         }
310 
311         if (qtest_enabled()) {
312             /* When testing, qtest commands advance icount.  */
313             return;
314         }
315 
316         replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
317     } else {
318         /* warp clock deterministically in record/replay mode */
319         if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
320             /*
321              * vCPU is sleeping and warp can't be started.
322              * It is probably a race condition: notification sent
323              * to vCPU was processed in advance and vCPU went to sleep.
324              * Therefore we have to wake it up for doing something.
325              */
326             if (replay_has_event()) {
327                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
328             }
329             return;
330         }
331     }
332 
333     /* We want to use the earliest deadline from ALL vm_clocks */
334     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
335     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
336                                           ~QEMU_TIMER_ATTR_EXTERNAL);
337     if (deadline < 0) {
338         if (!icount_sleep) {
339             warn_report_once("icount sleep disabled and no active timers");
340         }
341         return;
342     }
343 
344     if (deadline > 0) {
345         /*
346          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
347          * sleep.  Otherwise, the CPU might be waiting for a future timer
348          * interrupt to wake it up, but the interrupt never comes because
349          * the vCPU isn't running any insns and thus doesn't advance the
350          * QEMU_CLOCK_VIRTUAL.
351          */
352         if (!icount_sleep) {
353             /*
354              * We never let VCPUs sleep in no sleep icount mode.
355              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
356              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
357              * It is useful when we want a deterministic execution time,
358              * isolated from host latencies.
359              */
360             seqlock_write_lock(&timers_state.vm_clock_seqlock,
361                                &timers_state.vm_clock_lock);
362             qatomic_set_i64(&timers_state.qemu_icount_bias,
363                             timers_state.qemu_icount_bias + deadline);
364             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
365                                  &timers_state.vm_clock_lock);
366             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
367         } else {
368             /*
369              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
370              * "real" time, (related to the time left until the next event) has
371              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
372              * This avoids that the warps are visible externally; for example,
373              * you will not be sending network packets continuously instead of
374              * every 100ms.
375              */
376             seqlock_write_lock(&timers_state.vm_clock_seqlock,
377                                &timers_state.vm_clock_lock);
378             if (timers_state.vm_clock_warp_start == -1
379                 || timers_state.vm_clock_warp_start > clock) {
380                 timers_state.vm_clock_warp_start = clock;
381             }
382             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
383                                  &timers_state.vm_clock_lock);
384             timer_mod_anticipate(timers_state.icount_warp_timer,
385                                  clock + deadline);
386         }
387     } else if (deadline == 0) {
388         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
389     }
390 }
391 
icount_account_warp_timer(void)392 void icount_account_warp_timer(void)
393 {
394     if (!icount_sleep) {
395         return;
396     }
397 
398     /*
399      * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
400      * do not fire, so computing the deadline does not make sense.
401      */
402     if (!runstate_is_running()) {
403         return;
404     }
405 
406     replay_async_events();
407 
408     /* warp clock deterministically in record/replay mode */
409     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
410         return;
411     }
412 
413     timer_del(timers_state.icount_warp_timer);
414     icount_warp_rt();
415 }
416 
icount_configure(QemuOpts * opts,Error ** errp)417 bool icount_configure(QemuOpts *opts, Error **errp)
418 {
419     const char *option = qemu_opt_get(opts, "shift");
420     bool sleep = qemu_opt_get_bool(opts, "sleep", true);
421     bool align = qemu_opt_get_bool(opts, "align", false);
422     long time_shift = -1;
423 
424     if (!option) {
425         if (qemu_opt_get(opts, "align") != NULL) {
426             error_setg(errp, "Please specify shift option when using align");
427             return false;
428         }
429         return true;
430     }
431 
432     if (align && !sleep) {
433         error_setg(errp, "align=on and sleep=off are incompatible");
434         return false;
435     }
436 
437     if (strcmp(option, "auto") != 0) {
438         if (qemu_strtol(option, NULL, 0, &time_shift) < 0
439             || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
440             error_setg(errp, "icount: Invalid shift value");
441             return false;
442         }
443     } else if (icount_align_option) {
444         error_setg(errp, "shift=auto and align=on are incompatible");
445         return false;
446     } else if (!icount_sleep) {
447         error_setg(errp, "shift=auto and sleep=off are incompatible");
448         return false;
449     }
450 
451     icount_sleep = sleep;
452     if (icount_sleep) {
453         timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
454                                          icount_timer_cb, NULL);
455     }
456 
457     icount_align_option = align;
458 
459     if (time_shift >= 0) {
460         timers_state.icount_time_shift = time_shift;
461         icount_enable_precise();
462         return true;
463     }
464 
465     icount_enable_adaptive();
466 
467     /*
468      * 125MIPS seems a reasonable initial guess at the guest speed.
469      * It will be corrected fairly quickly anyway.
470      */
471     timers_state.icount_time_shift = 3;
472 
473     /*
474      * Have both realtime and virtual time triggers for speed adjustment.
475      * The realtime trigger catches emulated time passing too slowly,
476      * the virtual time trigger catches emulated time passing too fast.
477      * Realtime triggers occur even when idle, so use them less frequently
478      * than VM triggers.
479      */
480     timers_state.vm_clock_warp_start = -1;
481     timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
482                                    icount_adjust_rt, NULL);
483     timer_mod(timers_state.icount_rt_timer,
484                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
485     timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
486                                         icount_adjust_vm, NULL);
487     timer_mod(timers_state.icount_vm_timer,
488                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
489                    NANOSECONDS_PER_SECOND / 10);
490     return true;
491 }
492 
icount_notify_exit(void)493 void icount_notify_exit(void)
494 {
495     assert(icount_enabled());
496 
497     if (current_cpu) {
498         qemu_cpu_kick(current_cpu);
499         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
500     }
501 }
502