1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/cutils.h" 27 #include "migration/vmstate.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 #include "sysemu/cpus.h" 31 #include "sysemu/qtest.h" 32 #include "qemu/main-loop.h" 33 #include "qemu/option.h" 34 #include "qemu/seqlock.h" 35 #include "sysemu/replay.h" 36 #include "sysemu/runstate.h" 37 #include "hw/core/cpu.h" 38 #include "sysemu/cpu-timers.h" 39 #include "sysemu/cpu-throttle.h" 40 #include "sysemu/cpu-timers-internal.h" 41 42 /* 43 * ICOUNT: Instruction Counter 44 * 45 * this module is split off from cpu-timers because the icount part 46 * is TCG-specific, and does not need to be built for other accels. 47 */ 48 static bool icount_sleep = true; 49 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 50 #define MAX_ICOUNT_SHIFT 10 51 52 /* Do not count executed instructions */ 53 ICountMode use_icount = ICOUNT_DISABLED; 54 55 static void icount_enable_precise(void) 56 { 57 /* Fixed conversion of insn to ns via "shift" option */ 58 use_icount = ICOUNT_PRECISE; 59 } 60 61 static void icount_enable_adaptive(void) 62 { 63 /* Runtime adaptive algorithm to compute shift */ 64 use_icount = ICOUNT_ADAPTATIVE; 65 } 66 67 /* 68 * The current number of executed instructions is based on what we 69 * originally budgeted minus the current state of the decrementing 70 * icount counters in extra/u16.low. 71 */ 72 static int64_t icount_get_executed(CPUState *cpu) 73 { 74 return (cpu->icount_budget - 75 (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); 76 } 77 78 /* 79 * Update the global shared timer_state.qemu_icount to take into 80 * account executed instructions. This is done by the TCG vCPU 81 * thread so the main-loop can see time has moved forward. 82 */ 83 static void icount_update_locked(CPUState *cpu) 84 { 85 int64_t executed = icount_get_executed(cpu); 86 cpu->icount_budget -= executed; 87 88 qatomic_set_i64(&timers_state.qemu_icount, 89 timers_state.qemu_icount + executed); 90 } 91 92 /* 93 * Update the global shared timer_state.qemu_icount to take into 94 * account executed instructions. This is done by the TCG vCPU 95 * thread so the main-loop can see time has moved forward. 96 */ 97 void icount_update(CPUState *cpu) 98 { 99 seqlock_write_lock(&timers_state.vm_clock_seqlock, 100 &timers_state.vm_clock_lock); 101 icount_update_locked(cpu); 102 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 103 &timers_state.vm_clock_lock); 104 } 105 106 static int64_t icount_get_raw_locked(void) 107 { 108 CPUState *cpu = current_cpu; 109 110 if (cpu && cpu->running) { 111 if (!cpu->neg.can_do_io) { 112 error_report("Bad icount read"); 113 exit(1); 114 } 115 /* Take into account what has run */ 116 icount_update_locked(cpu); 117 } 118 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 119 return qatomic_read_i64(&timers_state.qemu_icount); 120 } 121 122 static int64_t icount_get_locked(void) 123 { 124 int64_t icount = icount_get_raw_locked(); 125 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 126 icount_to_ns(icount); 127 } 128 129 int64_t icount_get_raw(void) 130 { 131 int64_t icount; 132 unsigned start; 133 134 do { 135 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 136 icount = icount_get_raw_locked(); 137 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 138 139 return icount; 140 } 141 142 /* Return the virtual CPU time, based on the instruction counter. */ 143 int64_t icount_get(void) 144 { 145 int64_t icount; 146 unsigned start; 147 148 do { 149 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 150 icount = icount_get_locked(); 151 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 152 153 return icount; 154 } 155 156 int64_t icount_to_ns(int64_t icount) 157 { 158 return icount << qatomic_read(&timers_state.icount_time_shift); 159 } 160 161 /* 162 * Correlation between real and virtual time is always going to be 163 * fairly approximate, so ignore small variation. 164 * When the guest is idle real and virtual time will be aligned in 165 * the IO wait loop. 166 */ 167 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 168 169 static void icount_adjust(void) 170 { 171 int64_t cur_time; 172 int64_t cur_icount; 173 int64_t delta; 174 175 /* If the VM is not running, then do nothing. */ 176 if (!runstate_is_running()) { 177 return; 178 } 179 180 seqlock_write_lock(&timers_state.vm_clock_seqlock, 181 &timers_state.vm_clock_lock); 182 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 183 cpu_get_clock_locked()); 184 cur_icount = icount_get_locked(); 185 186 delta = cur_icount - cur_time; 187 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 188 if (delta > 0 189 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 190 && timers_state.icount_time_shift > 0) { 191 /* The guest is getting too far ahead. Slow time down. */ 192 qatomic_set(&timers_state.icount_time_shift, 193 timers_state.icount_time_shift - 1); 194 } 195 if (delta < 0 196 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 197 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 198 /* The guest is getting too far behind. Speed time up. */ 199 qatomic_set(&timers_state.icount_time_shift, 200 timers_state.icount_time_shift + 1); 201 } 202 timers_state.last_delta = delta; 203 qatomic_set_i64(&timers_state.qemu_icount_bias, 204 cur_icount - (timers_state.qemu_icount 205 << timers_state.icount_time_shift)); 206 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 207 &timers_state.vm_clock_lock); 208 } 209 210 static void icount_adjust_rt(void *opaque) 211 { 212 timer_mod(timers_state.icount_rt_timer, 213 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 214 icount_adjust(); 215 } 216 217 static void icount_adjust_vm(void *opaque) 218 { 219 timer_mod(timers_state.icount_vm_timer, 220 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 221 NANOSECONDS_PER_SECOND / 10); 222 icount_adjust(); 223 } 224 225 int64_t icount_round(int64_t count) 226 { 227 int shift = qatomic_read(&timers_state.icount_time_shift); 228 return (count + (1 << shift) - 1) >> shift; 229 } 230 231 static void icount_warp_rt(void) 232 { 233 unsigned seq; 234 int64_t warp_start; 235 236 /* 237 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 238 * changes from -1 to another value, so the race here is okay. 239 */ 240 do { 241 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 242 warp_start = timers_state.vm_clock_warp_start; 243 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 244 245 if (warp_start == -1) { 246 return; 247 } 248 249 seqlock_write_lock(&timers_state.vm_clock_seqlock, 250 &timers_state.vm_clock_lock); 251 if (runstate_is_running()) { 252 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 253 cpu_get_clock_locked()); 254 int64_t warp_delta; 255 256 warp_delta = clock - timers_state.vm_clock_warp_start; 257 if (icount_enabled() == ICOUNT_ADAPTATIVE) { 258 /* 259 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far 260 * ahead of real time (it might already be ahead so careful not 261 * to go backwards). 262 */ 263 int64_t cur_icount = icount_get_locked(); 264 int64_t delta = clock - cur_icount; 265 266 if (delta < 0) { 267 delta = 0; 268 } 269 warp_delta = MIN(warp_delta, delta); 270 } 271 qatomic_set_i64(&timers_state.qemu_icount_bias, 272 timers_state.qemu_icount_bias + warp_delta); 273 } 274 timers_state.vm_clock_warp_start = -1; 275 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 276 &timers_state.vm_clock_lock); 277 278 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 279 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 280 } 281 } 282 283 static void icount_timer_cb(void *opaque) 284 { 285 /* 286 * No need for a checkpoint because the timer already synchronizes 287 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 288 */ 289 icount_warp_rt(); 290 } 291 292 void icount_start_warp_timer(void) 293 { 294 int64_t clock; 295 int64_t deadline; 296 297 assert(icount_enabled()); 298 299 /* 300 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 301 * do not fire, so computing the deadline does not make sense. 302 */ 303 if (!runstate_is_running()) { 304 return; 305 } 306 307 if (replay_mode != REPLAY_MODE_PLAY) { 308 if (!all_cpu_threads_idle()) { 309 return; 310 } 311 312 if (qtest_enabled()) { 313 /* When testing, qtest commands advance icount. */ 314 return; 315 } 316 317 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 318 } else { 319 /* warp clock deterministically in record/replay mode */ 320 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 321 /* 322 * vCPU is sleeping and warp can't be started. 323 * It is probably a race condition: notification sent 324 * to vCPU was processed in advance and vCPU went to sleep. 325 * Therefore we have to wake it up for doing something. 326 */ 327 if (replay_has_event()) { 328 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 329 } 330 return; 331 } 332 } 333 334 /* We want to use the earliest deadline from ALL vm_clocks */ 335 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 336 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 337 ~QEMU_TIMER_ATTR_EXTERNAL); 338 if (deadline < 0) { 339 static bool notified; 340 if (!icount_sleep && !notified) { 341 warn_report("icount sleep disabled and no active timers"); 342 notified = true; 343 } 344 return; 345 } 346 347 if (deadline > 0) { 348 /* 349 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 350 * sleep. Otherwise, the CPU might be waiting for a future timer 351 * interrupt to wake it up, but the interrupt never comes because 352 * the vCPU isn't running any insns and thus doesn't advance the 353 * QEMU_CLOCK_VIRTUAL. 354 */ 355 if (!icount_sleep) { 356 /* 357 * We never let VCPUs sleep in no sleep icount mode. 358 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 359 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 360 * It is useful when we want a deterministic execution time, 361 * isolated from host latencies. 362 */ 363 seqlock_write_lock(&timers_state.vm_clock_seqlock, 364 &timers_state.vm_clock_lock); 365 qatomic_set_i64(&timers_state.qemu_icount_bias, 366 timers_state.qemu_icount_bias + deadline); 367 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 368 &timers_state.vm_clock_lock); 369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 370 } else { 371 /* 372 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 373 * "real" time, (related to the time left until the next event) has 374 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 375 * This avoids that the warps are visible externally; for example, 376 * you will not be sending network packets continuously instead of 377 * every 100ms. 378 */ 379 seqlock_write_lock(&timers_state.vm_clock_seqlock, 380 &timers_state.vm_clock_lock); 381 if (timers_state.vm_clock_warp_start == -1 382 || timers_state.vm_clock_warp_start > clock) { 383 timers_state.vm_clock_warp_start = clock; 384 } 385 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 386 &timers_state.vm_clock_lock); 387 timer_mod_anticipate(timers_state.icount_warp_timer, 388 clock + deadline); 389 } 390 } else if (deadline == 0) { 391 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 392 } 393 } 394 395 void icount_account_warp_timer(void) 396 { 397 if (!icount_sleep) { 398 return; 399 } 400 401 /* 402 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 403 * do not fire, so computing the deadline does not make sense. 404 */ 405 if (!runstate_is_running()) { 406 return; 407 } 408 409 replay_async_events(); 410 411 /* warp clock deterministically in record/replay mode */ 412 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 413 return; 414 } 415 416 timer_del(timers_state.icount_warp_timer); 417 icount_warp_rt(); 418 } 419 420 bool icount_configure(QemuOpts *opts, Error **errp) 421 { 422 const char *option = qemu_opt_get(opts, "shift"); 423 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 424 bool align = qemu_opt_get_bool(opts, "align", false); 425 long time_shift = -1; 426 427 if (!option) { 428 if (qemu_opt_get(opts, "align") != NULL) { 429 error_setg(errp, "Please specify shift option when using align"); 430 return false; 431 } 432 return true; 433 } 434 435 if (align && !sleep) { 436 error_setg(errp, "align=on and sleep=off are incompatible"); 437 return false; 438 } 439 440 if (strcmp(option, "auto") != 0) { 441 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 442 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 443 error_setg(errp, "icount: Invalid shift value"); 444 return false; 445 } 446 } else if (icount_align_option) { 447 error_setg(errp, "shift=auto and align=on are incompatible"); 448 return false; 449 } else if (!icount_sleep) { 450 error_setg(errp, "shift=auto and sleep=off are incompatible"); 451 return false; 452 } 453 454 icount_sleep = sleep; 455 if (icount_sleep) { 456 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 457 icount_timer_cb, NULL); 458 } 459 460 icount_align_option = align; 461 462 if (time_shift >= 0) { 463 timers_state.icount_time_shift = time_shift; 464 icount_enable_precise(); 465 return true; 466 } 467 468 icount_enable_adaptive(); 469 470 /* 471 * 125MIPS seems a reasonable initial guess at the guest speed. 472 * It will be corrected fairly quickly anyway. 473 */ 474 timers_state.icount_time_shift = 3; 475 476 /* 477 * Have both realtime and virtual time triggers for speed adjustment. 478 * The realtime trigger catches emulated time passing too slowly, 479 * the virtual time trigger catches emulated time passing too fast. 480 * Realtime triggers occur even when idle, so use them less frequently 481 * than VM triggers. 482 */ 483 timers_state.vm_clock_warp_start = -1; 484 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 485 icount_adjust_rt, NULL); 486 timer_mod(timers_state.icount_rt_timer, 487 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 488 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 489 icount_adjust_vm, NULL); 490 timer_mod(timers_state.icount_vm_timer, 491 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 492 NANOSECONDS_PER_SECOND / 10); 493 return true; 494 } 495 496 void icount_notify_exit(void) 497 { 498 assert(icount_enabled()); 499 500 if (current_cpu) { 501 qemu_cpu_kick(current_cpu); 502 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 503 } 504 } 505