1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/cutils.h" 27 #include "migration/vmstate.h" 28 #include "qapi/error.h" 29 #include "qemu/error-report.h" 30 #include "sysemu/cpus.h" 31 #include "sysemu/qtest.h" 32 #include "qemu/main-loop.h" 33 #include "qemu/option.h" 34 #include "qemu/seqlock.h" 35 #include "sysemu/replay.h" 36 #include "sysemu/runstate.h" 37 #include "hw/core/cpu.h" 38 #include "sysemu/cpu-timers.h" 39 #include "sysemu/cpu-throttle.h" 40 #include "softmmu/timers-state.h" 41 42 /* 43 * ICOUNT: Instruction Counter 44 * 45 * this module is split off from cpu-timers because the icount part 46 * is TCG-specific, and does not need to be built for other accels. 47 */ 48 static bool icount_sleep = true; 49 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 50 #define MAX_ICOUNT_SHIFT 10 51 52 /* 53 * 0 = Do not count executed instructions. 54 * 1 = Fixed conversion of insn to ns via "shift" option 55 * 2 = Runtime adaptive algorithm to compute shift 56 */ 57 int use_icount; 58 59 static void icount_enable_precise(void) 60 { 61 use_icount = 1; 62 } 63 64 static void icount_enable_adaptive(void) 65 { 66 use_icount = 2; 67 } 68 69 /* 70 * The current number of executed instructions is based on what we 71 * originally budgeted minus the current state of the decrementing 72 * icount counters in extra/u16.low. 73 */ 74 static int64_t icount_get_executed(CPUState *cpu) 75 { 76 return (cpu->icount_budget - 77 (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); 78 } 79 80 /* 81 * Update the global shared timer_state.qemu_icount to take into 82 * account executed instructions. This is done by the TCG vCPU 83 * thread so the main-loop can see time has moved forward. 84 */ 85 static void icount_update_locked(CPUState *cpu) 86 { 87 int64_t executed = icount_get_executed(cpu); 88 cpu->icount_budget -= executed; 89 90 qatomic_set_i64(&timers_state.qemu_icount, 91 timers_state.qemu_icount + executed); 92 } 93 94 /* 95 * Update the global shared timer_state.qemu_icount to take into 96 * account executed instructions. This is done by the TCG vCPU 97 * thread so the main-loop can see time has moved forward. 98 */ 99 void icount_update(CPUState *cpu) 100 { 101 seqlock_write_lock(&timers_state.vm_clock_seqlock, 102 &timers_state.vm_clock_lock); 103 icount_update_locked(cpu); 104 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 105 &timers_state.vm_clock_lock); 106 } 107 108 static int64_t icount_get_raw_locked(void) 109 { 110 CPUState *cpu = current_cpu; 111 112 if (cpu && cpu->running) { 113 if (!cpu->neg.can_do_io) { 114 error_report("Bad icount read"); 115 exit(1); 116 } 117 /* Take into account what has run */ 118 icount_update_locked(cpu); 119 } 120 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 121 return qatomic_read_i64(&timers_state.qemu_icount); 122 } 123 124 static int64_t icount_get_locked(void) 125 { 126 int64_t icount = icount_get_raw_locked(); 127 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 128 icount_to_ns(icount); 129 } 130 131 int64_t icount_get_raw(void) 132 { 133 int64_t icount; 134 unsigned start; 135 136 do { 137 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 138 icount = icount_get_raw_locked(); 139 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 140 141 return icount; 142 } 143 144 /* Return the virtual CPU time, based on the instruction counter. */ 145 int64_t icount_get(void) 146 { 147 int64_t icount; 148 unsigned start; 149 150 do { 151 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 152 icount = icount_get_locked(); 153 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 154 155 return icount; 156 } 157 158 int64_t icount_to_ns(int64_t icount) 159 { 160 return icount << qatomic_read(&timers_state.icount_time_shift); 161 } 162 163 /* 164 * Correlation between real and virtual time is always going to be 165 * fairly approximate, so ignore small variation. 166 * When the guest is idle real and virtual time will be aligned in 167 * the IO wait loop. 168 */ 169 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 170 171 static void icount_adjust(void) 172 { 173 int64_t cur_time; 174 int64_t cur_icount; 175 int64_t delta; 176 177 /* If the VM is not running, then do nothing. */ 178 if (!runstate_is_running()) { 179 return; 180 } 181 182 seqlock_write_lock(&timers_state.vm_clock_seqlock, 183 &timers_state.vm_clock_lock); 184 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 185 cpu_get_clock_locked()); 186 cur_icount = icount_get_locked(); 187 188 delta = cur_icount - cur_time; 189 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 190 if (delta > 0 191 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 192 && timers_state.icount_time_shift > 0) { 193 /* The guest is getting too far ahead. Slow time down. */ 194 qatomic_set(&timers_state.icount_time_shift, 195 timers_state.icount_time_shift - 1); 196 } 197 if (delta < 0 198 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 199 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 200 /* The guest is getting too far behind. Speed time up. */ 201 qatomic_set(&timers_state.icount_time_shift, 202 timers_state.icount_time_shift + 1); 203 } 204 timers_state.last_delta = delta; 205 qatomic_set_i64(&timers_state.qemu_icount_bias, 206 cur_icount - (timers_state.qemu_icount 207 << timers_state.icount_time_shift)); 208 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 209 &timers_state.vm_clock_lock); 210 } 211 212 static void icount_adjust_rt(void *opaque) 213 { 214 timer_mod(timers_state.icount_rt_timer, 215 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 216 icount_adjust(); 217 } 218 219 static void icount_adjust_vm(void *opaque) 220 { 221 timer_mod(timers_state.icount_vm_timer, 222 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 223 NANOSECONDS_PER_SECOND / 10); 224 icount_adjust(); 225 } 226 227 int64_t icount_round(int64_t count) 228 { 229 int shift = qatomic_read(&timers_state.icount_time_shift); 230 return (count + (1 << shift) - 1) >> shift; 231 } 232 233 static void icount_warp_rt(void) 234 { 235 unsigned seq; 236 int64_t warp_start; 237 238 /* 239 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 240 * changes from -1 to another value, so the race here is okay. 241 */ 242 do { 243 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 244 warp_start = timers_state.vm_clock_warp_start; 245 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 246 247 if (warp_start == -1) { 248 return; 249 } 250 251 seqlock_write_lock(&timers_state.vm_clock_seqlock, 252 &timers_state.vm_clock_lock); 253 if (runstate_is_running()) { 254 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 255 cpu_get_clock_locked()); 256 int64_t warp_delta; 257 258 warp_delta = clock - timers_state.vm_clock_warp_start; 259 if (icount_enabled() == 2) { 260 /* 261 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far 262 * ahead of real time (it might already be ahead so careful not 263 * to go backwards). 264 */ 265 int64_t cur_icount = icount_get_locked(); 266 int64_t delta = clock - cur_icount; 267 268 if (delta < 0) { 269 delta = 0; 270 } 271 warp_delta = MIN(warp_delta, delta); 272 } 273 qatomic_set_i64(&timers_state.qemu_icount_bias, 274 timers_state.qemu_icount_bias + warp_delta); 275 } 276 timers_state.vm_clock_warp_start = -1; 277 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 278 &timers_state.vm_clock_lock); 279 280 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 281 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 282 } 283 } 284 285 static void icount_timer_cb(void *opaque) 286 { 287 /* 288 * No need for a checkpoint because the timer already synchronizes 289 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 290 */ 291 icount_warp_rt(); 292 } 293 294 void icount_start_warp_timer(void) 295 { 296 int64_t clock; 297 int64_t deadline; 298 299 assert(icount_enabled()); 300 301 /* 302 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 303 * do not fire, so computing the deadline does not make sense. 304 */ 305 if (!runstate_is_running()) { 306 return; 307 } 308 309 if (replay_mode != REPLAY_MODE_PLAY) { 310 if (!all_cpu_threads_idle()) { 311 return; 312 } 313 314 if (qtest_enabled()) { 315 /* When testing, qtest commands advance icount. */ 316 return; 317 } 318 319 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 320 } else { 321 /* warp clock deterministically in record/replay mode */ 322 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 323 /* 324 * vCPU is sleeping and warp can't be started. 325 * It is probably a race condition: notification sent 326 * to vCPU was processed in advance and vCPU went to sleep. 327 * Therefore we have to wake it up for doing something. 328 */ 329 if (replay_has_event()) { 330 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 331 } 332 return; 333 } 334 } 335 336 /* We want to use the earliest deadline from ALL vm_clocks */ 337 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 338 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 339 ~QEMU_TIMER_ATTR_EXTERNAL); 340 if (deadline < 0) { 341 static bool notified; 342 if (!icount_sleep && !notified) { 343 warn_report("icount sleep disabled and no active timers"); 344 notified = true; 345 } 346 return; 347 } 348 349 if (deadline > 0) { 350 /* 351 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 352 * sleep. Otherwise, the CPU might be waiting for a future timer 353 * interrupt to wake it up, but the interrupt never comes because 354 * the vCPU isn't running any insns and thus doesn't advance the 355 * QEMU_CLOCK_VIRTUAL. 356 */ 357 if (!icount_sleep) { 358 /* 359 * We never let VCPUs sleep in no sleep icount mode. 360 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 361 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 362 * It is useful when we want a deterministic execution time, 363 * isolated from host latencies. 364 */ 365 seqlock_write_lock(&timers_state.vm_clock_seqlock, 366 &timers_state.vm_clock_lock); 367 qatomic_set_i64(&timers_state.qemu_icount_bias, 368 timers_state.qemu_icount_bias + deadline); 369 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 370 &timers_state.vm_clock_lock); 371 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 372 } else { 373 /* 374 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 375 * "real" time, (related to the time left until the next event) has 376 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 377 * This avoids that the warps are visible externally; for example, 378 * you will not be sending network packets continuously instead of 379 * every 100ms. 380 */ 381 seqlock_write_lock(&timers_state.vm_clock_seqlock, 382 &timers_state.vm_clock_lock); 383 if (timers_state.vm_clock_warp_start == -1 384 || timers_state.vm_clock_warp_start > clock) { 385 timers_state.vm_clock_warp_start = clock; 386 } 387 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 388 &timers_state.vm_clock_lock); 389 timer_mod_anticipate(timers_state.icount_warp_timer, 390 clock + deadline); 391 } 392 } else if (deadline == 0) { 393 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 394 } 395 } 396 397 void icount_account_warp_timer(void) 398 { 399 if (!icount_sleep) { 400 return; 401 } 402 403 /* 404 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 405 * do not fire, so computing the deadline does not make sense. 406 */ 407 if (!runstate_is_running()) { 408 return; 409 } 410 411 replay_async_events(); 412 413 /* warp clock deterministically in record/replay mode */ 414 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 415 return; 416 } 417 418 timer_del(timers_state.icount_warp_timer); 419 icount_warp_rt(); 420 } 421 422 void icount_configure(QemuOpts *opts, Error **errp) 423 { 424 const char *option = qemu_opt_get(opts, "shift"); 425 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 426 bool align = qemu_opt_get_bool(opts, "align", false); 427 long time_shift = -1; 428 429 if (!option) { 430 if (qemu_opt_get(opts, "align") != NULL) { 431 error_setg(errp, "Please specify shift option when using align"); 432 } 433 return; 434 } 435 436 if (align && !sleep) { 437 error_setg(errp, "align=on and sleep=off are incompatible"); 438 return; 439 } 440 441 if (strcmp(option, "auto") != 0) { 442 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 443 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 444 error_setg(errp, "icount: Invalid shift value"); 445 return; 446 } 447 } else if (icount_align_option) { 448 error_setg(errp, "shift=auto and align=on are incompatible"); 449 return; 450 } else if (!icount_sleep) { 451 error_setg(errp, "shift=auto and sleep=off are incompatible"); 452 return; 453 } 454 455 icount_sleep = sleep; 456 if (icount_sleep) { 457 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 458 icount_timer_cb, NULL); 459 } 460 461 icount_align_option = align; 462 463 if (time_shift >= 0) { 464 timers_state.icount_time_shift = time_shift; 465 icount_enable_precise(); 466 return; 467 } 468 469 icount_enable_adaptive(); 470 471 /* 472 * 125MIPS seems a reasonable initial guess at the guest speed. 473 * It will be corrected fairly quickly anyway. 474 */ 475 timers_state.icount_time_shift = 3; 476 477 /* 478 * Have both realtime and virtual time triggers for speed adjustment. 479 * The realtime trigger catches emulated time passing too slowly, 480 * the virtual time trigger catches emulated time passing too fast. 481 * Realtime triggers occur even when idle, so use them less frequently 482 * than VM triggers. 483 */ 484 timers_state.vm_clock_warp_start = -1; 485 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 486 icount_adjust_rt, NULL); 487 timer_mod(timers_state.icount_rt_timer, 488 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 489 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 490 icount_adjust_vm, NULL); 491 timer_mod(timers_state.icount_vm_timer, 492 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 493 NANOSECONDS_PER_SECOND / 10); 494 } 495 496 void icount_notify_exit(void) 497 { 498 if (icount_enabled() && current_cpu) { 499 qemu_cpu_kick(current_cpu); 500 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 501 } 502 } 503