1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/cutils.h" 27 #include "migration/vmstate.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 #include "sysemu/cpus.h" 31 #include "sysemu/qtest.h" 32 #include "qemu/main-loop.h" 33 #include "qemu/option.h" 34 #include "qemu/seqlock.h" 35 #include "sysemu/replay.h" 36 #include "sysemu/runstate.h" 37 #include "hw/core/cpu.h" 38 #include "sysemu/cpu-timers.h" 39 #include "sysemu/cpu-timers-internal.h" 40 41 /* 42 * ICOUNT: Instruction Counter 43 * 44 * this module is split off from cpu-timers because the icount part 45 * is TCG-specific, and does not need to be built for other accels. 46 */ 47 static bool icount_sleep = true; 48 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 49 #define MAX_ICOUNT_SHIFT 10 50 51 /* Do not count executed instructions */ 52 ICountMode use_icount = ICOUNT_DISABLED; 53 54 static void icount_enable_precise(void) 55 { 56 /* Fixed conversion of insn to ns via "shift" option */ 57 use_icount = ICOUNT_PRECISE; 58 } 59 60 static void icount_enable_adaptive(void) 61 { 62 /* Runtime adaptive algorithm to compute shift */ 63 use_icount = ICOUNT_ADAPTATIVE; 64 } 65 66 /* 67 * The current number of executed instructions is based on what we 68 * originally budgeted minus the current state of the decrementing 69 * icount counters in extra/u16.low. 70 */ 71 static int64_t icount_get_executed(CPUState *cpu) 72 { 73 return (cpu->icount_budget - 74 (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); 75 } 76 77 /* 78 * Update the global shared timer_state.qemu_icount to take into 79 * account executed instructions. This is done by the TCG vCPU 80 * thread so the main-loop can see time has moved forward. 81 */ 82 static void icount_update_locked(CPUState *cpu) 83 { 84 int64_t executed = icount_get_executed(cpu); 85 cpu->icount_budget -= executed; 86 87 qatomic_set_i64(&timers_state.qemu_icount, 88 timers_state.qemu_icount + executed); 89 } 90 91 /* 92 * Update the global shared timer_state.qemu_icount to take into 93 * account executed instructions. This is done by the TCG vCPU 94 * thread so the main-loop can see time has moved forward. 95 */ 96 void icount_update(CPUState *cpu) 97 { 98 seqlock_write_lock(&timers_state.vm_clock_seqlock, 99 &timers_state.vm_clock_lock); 100 icount_update_locked(cpu); 101 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 102 &timers_state.vm_clock_lock); 103 } 104 105 static int64_t icount_get_raw_locked(void) 106 { 107 CPUState *cpu = current_cpu; 108 109 if (cpu && cpu->running) { 110 if (!cpu->neg.can_do_io) { 111 error_report("Bad icount read"); 112 exit(1); 113 } 114 /* Take into account what has run */ 115 icount_update_locked(cpu); 116 } 117 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 118 return qatomic_read_i64(&timers_state.qemu_icount); 119 } 120 121 static int64_t icount_get_locked(void) 122 { 123 int64_t icount = icount_get_raw_locked(); 124 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 125 icount_to_ns(icount); 126 } 127 128 int64_t icount_get_raw(void) 129 { 130 int64_t icount; 131 unsigned start; 132 133 do { 134 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 135 icount = icount_get_raw_locked(); 136 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 137 138 return icount; 139 } 140 141 /* Return the virtual CPU time, based on the instruction counter. */ 142 int64_t icount_get(void) 143 { 144 int64_t icount; 145 unsigned start; 146 147 do { 148 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 149 icount = icount_get_locked(); 150 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 151 152 return icount; 153 } 154 155 int64_t icount_to_ns(int64_t icount) 156 { 157 return icount << qatomic_read(&timers_state.icount_time_shift); 158 } 159 160 /* 161 * Correlation between real and virtual time is always going to be 162 * fairly approximate, so ignore small variation. 163 * When the guest is idle real and virtual time will be aligned in 164 * the IO wait loop. 165 */ 166 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 167 168 static void icount_adjust(void) 169 { 170 int64_t cur_time; 171 int64_t cur_icount; 172 int64_t delta; 173 174 /* If the VM is not running, then do nothing. */ 175 if (!runstate_is_running()) { 176 return; 177 } 178 179 seqlock_write_lock(&timers_state.vm_clock_seqlock, 180 &timers_state.vm_clock_lock); 181 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 182 cpu_get_clock_locked()); 183 cur_icount = icount_get_locked(); 184 185 delta = cur_icount - cur_time; 186 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 187 if (delta > 0 188 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 189 && timers_state.icount_time_shift > 0) { 190 /* The guest is getting too far ahead. Slow time down. */ 191 qatomic_set(&timers_state.icount_time_shift, 192 timers_state.icount_time_shift - 1); 193 } 194 if (delta < 0 195 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 196 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 197 /* The guest is getting too far behind. Speed time up. */ 198 qatomic_set(&timers_state.icount_time_shift, 199 timers_state.icount_time_shift + 1); 200 } 201 timers_state.last_delta = delta; 202 qatomic_set_i64(&timers_state.qemu_icount_bias, 203 cur_icount - (timers_state.qemu_icount 204 << timers_state.icount_time_shift)); 205 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 206 &timers_state.vm_clock_lock); 207 } 208 209 static void icount_adjust_rt(void *opaque) 210 { 211 timer_mod(timers_state.icount_rt_timer, 212 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 213 icount_adjust(); 214 } 215 216 static void icount_adjust_vm(void *opaque) 217 { 218 timer_mod(timers_state.icount_vm_timer, 219 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 220 NANOSECONDS_PER_SECOND / 10); 221 icount_adjust(); 222 } 223 224 int64_t icount_round(int64_t count) 225 { 226 int shift = qatomic_read(&timers_state.icount_time_shift); 227 return (count + (1 << shift) - 1) >> shift; 228 } 229 230 static void icount_warp_rt(void) 231 { 232 unsigned seq; 233 int64_t warp_start; 234 235 /* 236 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 237 * changes from -1 to another value, so the race here is okay. 238 */ 239 do { 240 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 241 warp_start = timers_state.vm_clock_warp_start; 242 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 243 244 if (warp_start == -1) { 245 return; 246 } 247 248 seqlock_write_lock(&timers_state.vm_clock_seqlock, 249 &timers_state.vm_clock_lock); 250 if (runstate_is_running()) { 251 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 252 cpu_get_clock_locked()); 253 int64_t warp_delta; 254 255 warp_delta = clock - timers_state.vm_clock_warp_start; 256 if (icount_enabled() == ICOUNT_ADAPTATIVE) { 257 /* 258 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far 259 * ahead of real time (it might already be ahead so careful not 260 * to go backwards). 261 */ 262 int64_t cur_icount = icount_get_locked(); 263 int64_t delta = clock - cur_icount; 264 265 if (delta < 0) { 266 delta = 0; 267 } 268 warp_delta = MIN(warp_delta, delta); 269 } 270 qatomic_set_i64(&timers_state.qemu_icount_bias, 271 timers_state.qemu_icount_bias + warp_delta); 272 } 273 timers_state.vm_clock_warp_start = -1; 274 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 275 &timers_state.vm_clock_lock); 276 277 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 278 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 279 } 280 } 281 282 static void icount_timer_cb(void *opaque) 283 { 284 /* 285 * No need for a checkpoint because the timer already synchronizes 286 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 287 */ 288 icount_warp_rt(); 289 } 290 291 void icount_start_warp_timer(void) 292 { 293 int64_t clock; 294 int64_t deadline; 295 296 assert(icount_enabled()); 297 298 /* 299 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 300 * do not fire, so computing the deadline does not make sense. 301 */ 302 if (!runstate_is_running()) { 303 return; 304 } 305 306 if (replay_mode != REPLAY_MODE_PLAY) { 307 if (!all_cpu_threads_idle()) { 308 return; 309 } 310 311 if (qtest_enabled()) { 312 /* When testing, qtest commands advance icount. */ 313 return; 314 } 315 316 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 317 } else { 318 /* warp clock deterministically in record/replay mode */ 319 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 320 /* 321 * vCPU is sleeping and warp can't be started. 322 * It is probably a race condition: notification sent 323 * to vCPU was processed in advance and vCPU went to sleep. 324 * Therefore we have to wake it up for doing something. 325 */ 326 if (replay_has_event()) { 327 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 328 } 329 return; 330 } 331 } 332 333 /* We want to use the earliest deadline from ALL vm_clocks */ 334 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 335 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 336 ~QEMU_TIMER_ATTR_EXTERNAL); 337 if (deadline < 0) { 338 if (!icount_sleep) { 339 warn_report_once("icount sleep disabled and no active timers"); 340 } 341 return; 342 } 343 344 if (deadline > 0) { 345 /* 346 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 347 * sleep. Otherwise, the CPU might be waiting for a future timer 348 * interrupt to wake it up, but the interrupt never comes because 349 * the vCPU isn't running any insns and thus doesn't advance the 350 * QEMU_CLOCK_VIRTUAL. 351 */ 352 if (!icount_sleep) { 353 /* 354 * We never let VCPUs sleep in no sleep icount mode. 355 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 356 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 357 * It is useful when we want a deterministic execution time, 358 * isolated from host latencies. 359 */ 360 seqlock_write_lock(&timers_state.vm_clock_seqlock, 361 &timers_state.vm_clock_lock); 362 qatomic_set_i64(&timers_state.qemu_icount_bias, 363 timers_state.qemu_icount_bias + deadline); 364 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 365 &timers_state.vm_clock_lock); 366 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 367 } else { 368 /* 369 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 370 * "real" time, (related to the time left until the next event) has 371 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 372 * This avoids that the warps are visible externally; for example, 373 * you will not be sending network packets continuously instead of 374 * every 100ms. 375 */ 376 seqlock_write_lock(&timers_state.vm_clock_seqlock, 377 &timers_state.vm_clock_lock); 378 if (timers_state.vm_clock_warp_start == -1 379 || timers_state.vm_clock_warp_start > clock) { 380 timers_state.vm_clock_warp_start = clock; 381 } 382 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 383 &timers_state.vm_clock_lock); 384 timer_mod_anticipate(timers_state.icount_warp_timer, 385 clock + deadline); 386 } 387 } else if (deadline == 0) { 388 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 389 } 390 } 391 392 void icount_account_warp_timer(void) 393 { 394 if (!icount_sleep) { 395 return; 396 } 397 398 /* 399 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 400 * do not fire, so computing the deadline does not make sense. 401 */ 402 if (!runstate_is_running()) { 403 return; 404 } 405 406 replay_async_events(); 407 408 /* warp clock deterministically in record/replay mode */ 409 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 410 return; 411 } 412 413 timer_del(timers_state.icount_warp_timer); 414 icount_warp_rt(); 415 } 416 417 bool icount_configure(QemuOpts *opts, Error **errp) 418 { 419 const char *option = qemu_opt_get(opts, "shift"); 420 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 421 bool align = qemu_opt_get_bool(opts, "align", false); 422 long time_shift = -1; 423 424 if (!option) { 425 if (qemu_opt_get(opts, "align") != NULL) { 426 error_setg(errp, "Please specify shift option when using align"); 427 return false; 428 } 429 return true; 430 } 431 432 if (align && !sleep) { 433 error_setg(errp, "align=on and sleep=off are incompatible"); 434 return false; 435 } 436 437 if (strcmp(option, "auto") != 0) { 438 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 439 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 440 error_setg(errp, "icount: Invalid shift value"); 441 return false; 442 } 443 } else if (icount_align_option) { 444 error_setg(errp, "shift=auto and align=on are incompatible"); 445 return false; 446 } else if (!icount_sleep) { 447 error_setg(errp, "shift=auto and sleep=off are incompatible"); 448 return false; 449 } 450 451 icount_sleep = sleep; 452 if (icount_sleep) { 453 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 454 icount_timer_cb, NULL); 455 } 456 457 icount_align_option = align; 458 459 if (time_shift >= 0) { 460 timers_state.icount_time_shift = time_shift; 461 icount_enable_precise(); 462 return true; 463 } 464 465 icount_enable_adaptive(); 466 467 /* 468 * 125MIPS seems a reasonable initial guess at the guest speed. 469 * It will be corrected fairly quickly anyway. 470 */ 471 timers_state.icount_time_shift = 3; 472 473 /* 474 * Have both realtime and virtual time triggers for speed adjustment. 475 * The realtime trigger catches emulated time passing too slowly, 476 * the virtual time trigger catches emulated time passing too fast. 477 * Realtime triggers occur even when idle, so use them less frequently 478 * than VM triggers. 479 */ 480 timers_state.vm_clock_warp_start = -1; 481 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 482 icount_adjust_rt, NULL); 483 timer_mod(timers_state.icount_rt_timer, 484 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 485 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 486 icount_adjust_vm, NULL); 487 timer_mod(timers_state.icount_vm_timer, 488 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 489 NANOSECONDS_PER_SECOND / 10); 490 return true; 491 } 492 493 void icount_notify_exit(void) 494 { 495 assert(icount_enabled()); 496 497 if (current_cpu) { 498 qemu_cpu_kick(current_cpu); 499 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 500 } 501 } 502