1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/cutils.h" 27 #include "migration/vmstate.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 #include "sysemu/cpus.h" 31 #include "sysemu/qtest.h" 32 #include "qemu/main-loop.h" 33 #include "qemu/option.h" 34 #include "qemu/seqlock.h" 35 #include "sysemu/replay.h" 36 #include "sysemu/runstate.h" 37 #include "hw/core/cpu.h" 38 #include "sysemu/cpu-timers.h" 39 #include "sysemu/cpu-throttle.h" 40 #include "sysemu/cpu-timers-internal.h" 41 42 /* 43 * ICOUNT: Instruction Counter 44 * 45 * this module is split off from cpu-timers because the icount part 46 * is TCG-specific, and does not need to be built for other accels. 47 */ 48 static bool icount_sleep = true; 49 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 50 #define MAX_ICOUNT_SHIFT 10 51 52 /* Do not count executed instructions */ 53 ICountMode use_icount = ICOUNT_DISABLED; 54 55 static void icount_enable_precise(void) 56 { 57 /* Fixed conversion of insn to ns via "shift" option */ 58 use_icount = ICOUNT_PRECISE; 59 } 60 61 static void icount_enable_adaptive(void) 62 { 63 /* Runtime adaptive algorithm to compute shift */ 64 use_icount = ICOUNT_ADAPTATIVE; 65 } 66 67 /* 68 * The current number of executed instructions is based on what we 69 * originally budgeted minus the current state of the decrementing 70 * icount counters in extra/u16.low. 71 */ 72 static int64_t icount_get_executed(CPUState *cpu) 73 { 74 return (cpu->icount_budget - 75 (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); 76 } 77 78 /* 79 * Update the global shared timer_state.qemu_icount to take into 80 * account executed instructions. This is done by the TCG vCPU 81 * thread so the main-loop can see time has moved forward. 82 */ 83 static void icount_update_locked(CPUState *cpu) 84 { 85 int64_t executed = icount_get_executed(cpu); 86 cpu->icount_budget -= executed; 87 88 qatomic_set_i64(&timers_state.qemu_icount, 89 timers_state.qemu_icount + executed); 90 } 91 92 /* 93 * Update the global shared timer_state.qemu_icount to take into 94 * account executed instructions. This is done by the TCG vCPU 95 * thread so the main-loop can see time has moved forward. 96 */ 97 void icount_update(CPUState *cpu) 98 { 99 seqlock_write_lock(&timers_state.vm_clock_seqlock, 100 &timers_state.vm_clock_lock); 101 icount_update_locked(cpu); 102 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 103 &timers_state.vm_clock_lock); 104 } 105 106 static int64_t icount_get_raw_locked(void) 107 { 108 CPUState *cpu = current_cpu; 109 110 if (cpu && cpu->running) { 111 if (!cpu->neg.can_do_io) { 112 error_report("Bad icount read"); 113 exit(1); 114 } 115 /* Take into account what has run */ 116 icount_update_locked(cpu); 117 } 118 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 119 return qatomic_read_i64(&timers_state.qemu_icount); 120 } 121 122 static int64_t icount_get_locked(void) 123 { 124 int64_t icount = icount_get_raw_locked(); 125 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 126 icount_to_ns(icount); 127 } 128 129 int64_t icount_get_raw(void) 130 { 131 int64_t icount; 132 unsigned start; 133 134 do { 135 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 136 icount = icount_get_raw_locked(); 137 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 138 139 return icount; 140 } 141 142 /* Return the virtual CPU time, based on the instruction counter. */ 143 int64_t icount_get(void) 144 { 145 int64_t icount; 146 unsigned start; 147 148 do { 149 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 150 icount = icount_get_locked(); 151 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 152 153 return icount; 154 } 155 156 int64_t icount_to_ns(int64_t icount) 157 { 158 return icount << qatomic_read(&timers_state.icount_time_shift); 159 } 160 161 /* 162 * Correlation between real and virtual time is always going to be 163 * fairly approximate, so ignore small variation. 164 * When the guest is idle real and virtual time will be aligned in 165 * the IO wait loop. 166 */ 167 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 168 169 static void icount_adjust(void) 170 { 171 int64_t cur_time; 172 int64_t cur_icount; 173 int64_t delta; 174 175 /* If the VM is not running, then do nothing. */ 176 if (!runstate_is_running()) { 177 return; 178 } 179 180 seqlock_write_lock(&timers_state.vm_clock_seqlock, 181 &timers_state.vm_clock_lock); 182 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 183 cpu_get_clock_locked()); 184 cur_icount = icount_get_locked(); 185 186 delta = cur_icount - cur_time; 187 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 188 if (delta > 0 189 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 190 && timers_state.icount_time_shift > 0) { 191 /* The guest is getting too far ahead. Slow time down. */ 192 qatomic_set(&timers_state.icount_time_shift, 193 timers_state.icount_time_shift - 1); 194 } 195 if (delta < 0 196 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 197 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 198 /* The guest is getting too far behind. Speed time up. */ 199 qatomic_set(&timers_state.icount_time_shift, 200 timers_state.icount_time_shift + 1); 201 } 202 timers_state.last_delta = delta; 203 qatomic_set_i64(&timers_state.qemu_icount_bias, 204 cur_icount - (timers_state.qemu_icount 205 << timers_state.icount_time_shift)); 206 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 207 &timers_state.vm_clock_lock); 208 } 209 210 static void icount_adjust_rt(void *opaque) 211 { 212 timer_mod(timers_state.icount_rt_timer, 213 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 214 icount_adjust(); 215 } 216 217 static void icount_adjust_vm(void *opaque) 218 { 219 timer_mod(timers_state.icount_vm_timer, 220 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 221 NANOSECONDS_PER_SECOND / 10); 222 icount_adjust(); 223 } 224 225 int64_t icount_round(int64_t count) 226 { 227 int shift = qatomic_read(&timers_state.icount_time_shift); 228 return (count + (1 << shift) - 1) >> shift; 229 } 230 231 static void icount_warp_rt(void) 232 { 233 unsigned seq; 234 int64_t warp_start; 235 236 /* 237 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 238 * changes from -1 to another value, so the race here is okay. 239 */ 240 do { 241 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 242 warp_start = timers_state.vm_clock_warp_start; 243 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 244 245 if (warp_start == -1) { 246 return; 247 } 248 249 seqlock_write_lock(&timers_state.vm_clock_seqlock, 250 &timers_state.vm_clock_lock); 251 if (runstate_is_running()) { 252 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 253 cpu_get_clock_locked()); 254 int64_t warp_delta; 255 256 warp_delta = clock - timers_state.vm_clock_warp_start; 257 if (icount_enabled() == ICOUNT_ADAPTATIVE) { 258 /* 259 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far 260 * ahead of real time (it might already be ahead so careful not 261 * to go backwards). 262 */ 263 int64_t cur_icount = icount_get_locked(); 264 int64_t delta = clock - cur_icount; 265 266 if (delta < 0) { 267 delta = 0; 268 } 269 warp_delta = MIN(warp_delta, delta); 270 } 271 qatomic_set_i64(&timers_state.qemu_icount_bias, 272 timers_state.qemu_icount_bias + warp_delta); 273 } 274 timers_state.vm_clock_warp_start = -1; 275 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 276 &timers_state.vm_clock_lock); 277 278 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 279 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 280 } 281 } 282 283 static void icount_timer_cb(void *opaque) 284 { 285 /* 286 * No need for a checkpoint because the timer already synchronizes 287 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 288 */ 289 icount_warp_rt(); 290 } 291 292 void icount_start_warp_timer(void) 293 { 294 int64_t clock; 295 int64_t deadline; 296 297 assert(icount_enabled()); 298 299 /* 300 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 301 * do not fire, so computing the deadline does not make sense. 302 */ 303 if (!runstate_is_running()) { 304 return; 305 } 306 307 if (replay_mode != REPLAY_MODE_PLAY) { 308 if (!all_cpu_threads_idle()) { 309 return; 310 } 311 312 if (qtest_enabled()) { 313 /* When testing, qtest commands advance icount. */ 314 return; 315 } 316 317 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 318 } else { 319 /* warp clock deterministically in record/replay mode */ 320 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 321 /* 322 * vCPU is sleeping and warp can't be started. 323 * It is probably a race condition: notification sent 324 * to vCPU was processed in advance and vCPU went to sleep. 325 * Therefore we have to wake it up for doing something. 326 */ 327 if (replay_has_event()) { 328 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 329 } 330 return; 331 } 332 } 333 334 /* We want to use the earliest deadline from ALL vm_clocks */ 335 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 336 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 337 ~QEMU_TIMER_ATTR_EXTERNAL); 338 if (deadline < 0) { 339 if (!icount_sleep) { 340 warn_report_once("icount sleep disabled and no active timers"); 341 } 342 return; 343 } 344 345 if (deadline > 0) { 346 /* 347 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 348 * sleep. Otherwise, the CPU might be waiting for a future timer 349 * interrupt to wake it up, but the interrupt never comes because 350 * the vCPU isn't running any insns and thus doesn't advance the 351 * QEMU_CLOCK_VIRTUAL. 352 */ 353 if (!icount_sleep) { 354 /* 355 * We never let VCPUs sleep in no sleep icount mode. 356 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 357 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 358 * It is useful when we want a deterministic execution time, 359 * isolated from host latencies. 360 */ 361 seqlock_write_lock(&timers_state.vm_clock_seqlock, 362 &timers_state.vm_clock_lock); 363 qatomic_set_i64(&timers_state.qemu_icount_bias, 364 timers_state.qemu_icount_bias + deadline); 365 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 366 &timers_state.vm_clock_lock); 367 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 368 } else { 369 /* 370 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 371 * "real" time, (related to the time left until the next event) has 372 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 373 * This avoids that the warps are visible externally; for example, 374 * you will not be sending network packets continuously instead of 375 * every 100ms. 376 */ 377 seqlock_write_lock(&timers_state.vm_clock_seqlock, 378 &timers_state.vm_clock_lock); 379 if (timers_state.vm_clock_warp_start == -1 380 || timers_state.vm_clock_warp_start > clock) { 381 timers_state.vm_clock_warp_start = clock; 382 } 383 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 384 &timers_state.vm_clock_lock); 385 timer_mod_anticipate(timers_state.icount_warp_timer, 386 clock + deadline); 387 } 388 } else if (deadline == 0) { 389 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 390 } 391 } 392 393 void icount_account_warp_timer(void) 394 { 395 if (!icount_sleep) { 396 return; 397 } 398 399 /* 400 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 401 * do not fire, so computing the deadline does not make sense. 402 */ 403 if (!runstate_is_running()) { 404 return; 405 } 406 407 replay_async_events(); 408 409 /* warp clock deterministically in record/replay mode */ 410 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 411 return; 412 } 413 414 timer_del(timers_state.icount_warp_timer); 415 icount_warp_rt(); 416 } 417 418 bool icount_configure(QemuOpts *opts, Error **errp) 419 { 420 const char *option = qemu_opt_get(opts, "shift"); 421 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 422 bool align = qemu_opt_get_bool(opts, "align", false); 423 long time_shift = -1; 424 425 if (!option) { 426 if (qemu_opt_get(opts, "align") != NULL) { 427 error_setg(errp, "Please specify shift option when using align"); 428 return false; 429 } 430 return true; 431 } 432 433 if (align && !sleep) { 434 error_setg(errp, "align=on and sleep=off are incompatible"); 435 return false; 436 } 437 438 if (strcmp(option, "auto") != 0) { 439 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 440 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 441 error_setg(errp, "icount: Invalid shift value"); 442 return false; 443 } 444 } else if (icount_align_option) { 445 error_setg(errp, "shift=auto and align=on are incompatible"); 446 return false; 447 } else if (!icount_sleep) { 448 error_setg(errp, "shift=auto and sleep=off are incompatible"); 449 return false; 450 } 451 452 icount_sleep = sleep; 453 if (icount_sleep) { 454 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 455 icount_timer_cb, NULL); 456 } 457 458 icount_align_option = align; 459 460 if (time_shift >= 0) { 461 timers_state.icount_time_shift = time_shift; 462 icount_enable_precise(); 463 return true; 464 } 465 466 icount_enable_adaptive(); 467 468 /* 469 * 125MIPS seems a reasonable initial guess at the guest speed. 470 * It will be corrected fairly quickly anyway. 471 */ 472 timers_state.icount_time_shift = 3; 473 474 /* 475 * Have both realtime and virtual time triggers for speed adjustment. 476 * The realtime trigger catches emulated time passing too slowly, 477 * the virtual time trigger catches emulated time passing too fast. 478 * Realtime triggers occur even when idle, so use them less frequently 479 * than VM triggers. 480 */ 481 timers_state.vm_clock_warp_start = -1; 482 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 483 icount_adjust_rt, NULL); 484 timer_mod(timers_state.icount_rt_timer, 485 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 486 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 487 icount_adjust_vm, NULL); 488 timer_mod(timers_state.icount_vm_timer, 489 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 490 NANOSECONDS_PER_SECOND / 10); 491 return true; 492 } 493 494 void icount_notify_exit(void) 495 { 496 assert(icount_enabled()); 497 498 if (current_cpu) { 499 qemu_cpu_kick(current_cpu); 500 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 501 } 502 } 503