1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/main-loop.h" 27 #include "qemu/timer.h" 28 #include "qemu/lockable.h" 29 #include "sysemu/replay.h" 30 #include "sysemu/cpus.h" 31 32 #ifdef CONFIG_POSIX 33 #include <pthread.h> 34 #endif 35 36 #ifdef CONFIG_PPOLL 37 #include <poll.h> 38 #endif 39 40 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK 41 #include <sys/prctl.h> 42 #endif 43 44 /***********************************************************/ 45 /* timers */ 46 47 typedef struct QEMUClock { 48 /* We rely on BQL to protect the timerlists */ 49 QLIST_HEAD(, QEMUTimerList) timerlists; 50 51 QEMUClockType type; 52 bool enabled; 53 } QEMUClock; 54 55 QEMUTimerListGroup main_loop_tlg; 56 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX]; 57 58 /* A QEMUTimerList is a list of timers attached to a clock. More 59 * than one QEMUTimerList can be attached to each clock, for instance 60 * used by different AioContexts / threads. Each clock also has 61 * a list of the QEMUTimerLists associated with it, in order that 62 * reenabling the clock can call all the notifiers. 63 */ 64 65 struct QEMUTimerList { 66 QEMUClock *clock; 67 QemuMutex active_timers_lock; 68 QEMUTimer *active_timers; 69 QLIST_ENTRY(QEMUTimerList) list; 70 QEMUTimerListNotifyCB *notify_cb; 71 void *notify_opaque; 72 73 /* lightweight method to mark the end of timerlist's running */ 74 QemuEvent timers_done_ev; 75 }; 76 77 /** 78 * qemu_clock_ptr: 79 * @type: type of clock 80 * 81 * Translate a clock type into a pointer to QEMUClock object. 82 * 83 * Returns: a pointer to the QEMUClock object 84 */ 85 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type) 86 { 87 return &qemu_clocks[type]; 88 } 89 90 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) 91 { 92 return timer_head && (timer_head->expire_time <= current_time); 93 } 94 95 QEMUTimerList *timerlist_new(QEMUClockType type, 96 QEMUTimerListNotifyCB *cb, 97 void *opaque) 98 { 99 QEMUTimerList *timer_list; 100 QEMUClock *clock = qemu_clock_ptr(type); 101 102 timer_list = g_malloc0(sizeof(QEMUTimerList)); 103 qemu_event_init(&timer_list->timers_done_ev, true); 104 timer_list->clock = clock; 105 timer_list->notify_cb = cb; 106 timer_list->notify_opaque = opaque; 107 qemu_mutex_init(&timer_list->active_timers_lock); 108 QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list); 109 return timer_list; 110 } 111 112 void timerlist_free(QEMUTimerList *timer_list) 113 { 114 assert(!timerlist_has_timers(timer_list)); 115 if (timer_list->clock) { 116 QLIST_REMOVE(timer_list, list); 117 } 118 qemu_mutex_destroy(&timer_list->active_timers_lock); 119 g_free(timer_list); 120 } 121 122 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb) 123 { 124 QEMUClock *clock = qemu_clock_ptr(type); 125 126 /* Assert that the clock of type TYPE has not been initialized yet. */ 127 assert(main_loop_tlg.tl[type] == NULL); 128 129 clock->type = type; 130 clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true); 131 QLIST_INIT(&clock->timerlists); 132 main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL); 133 } 134 135 bool qemu_clock_use_for_deadline(QEMUClockType type) 136 { 137 return !(use_icount && (type == QEMU_CLOCK_VIRTUAL)); 138 } 139 140 void qemu_clock_notify(QEMUClockType type) 141 { 142 QEMUTimerList *timer_list; 143 QEMUClock *clock = qemu_clock_ptr(type); 144 QLIST_FOREACH(timer_list, &clock->timerlists, list) { 145 timerlist_notify(timer_list); 146 } 147 } 148 149 /* Disabling the clock will wait for related timerlists to stop 150 * executing qemu_run_timers. Thus, this functions should not 151 * be used from the callback of a timer that is based on @clock. 152 * Doing so would cause a deadlock. 153 * 154 * Caller should hold BQL. 155 */ 156 void qemu_clock_enable(QEMUClockType type, bool enabled) 157 { 158 QEMUClock *clock = qemu_clock_ptr(type); 159 QEMUTimerList *tl; 160 bool old = clock->enabled; 161 clock->enabled = enabled; 162 if (enabled && !old) { 163 qemu_clock_notify(type); 164 } else if (!enabled && old) { 165 QLIST_FOREACH(tl, &clock->timerlists, list) { 166 qemu_event_wait(&tl->timers_done_ev); 167 } 168 } 169 } 170 171 bool timerlist_has_timers(QEMUTimerList *timer_list) 172 { 173 return !!atomic_read(&timer_list->active_timers); 174 } 175 176 bool qemu_clock_has_timers(QEMUClockType type) 177 { 178 return timerlist_has_timers( 179 main_loop_tlg.tl[type]); 180 } 181 182 bool timerlist_expired(QEMUTimerList *timer_list) 183 { 184 int64_t expire_time; 185 186 if (!atomic_read(&timer_list->active_timers)) { 187 return false; 188 } 189 190 WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) { 191 if (!timer_list->active_timers) { 192 return false; 193 } 194 expire_time = timer_list->active_timers->expire_time; 195 } 196 197 return expire_time <= qemu_clock_get_ns(timer_list->clock->type); 198 } 199 200 bool qemu_clock_expired(QEMUClockType type) 201 { 202 return timerlist_expired( 203 main_loop_tlg.tl[type]); 204 } 205 206 /* 207 * As above, but return -1 for no deadline, and do not cap to 2^32 208 * as we know the result is always positive. 209 */ 210 211 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) 212 { 213 int64_t delta; 214 int64_t expire_time; 215 216 if (!atomic_read(&timer_list->active_timers)) { 217 return -1; 218 } 219 220 if (!timer_list->clock->enabled) { 221 return -1; 222 } 223 224 /* The active timers list may be modified before the caller uses our return 225 * value but ->notify_cb() is called when the deadline changes. Therefore 226 * the caller should notice the change and there is no race condition. 227 */ 228 WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) { 229 if (!timer_list->active_timers) { 230 return -1; 231 } 232 expire_time = timer_list->active_timers->expire_time; 233 } 234 235 delta = expire_time - qemu_clock_get_ns(timer_list->clock->type); 236 237 if (delta <= 0) { 238 return 0; 239 } 240 241 return delta; 242 } 243 244 /* Calculate the soonest deadline across all timerlists attached 245 * to the clock. This is used for the icount timeout so we 246 * ignore whether or not the clock should be used in deadline 247 * calculations. 248 */ 249 int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) 250 { 251 int64_t deadline = -1; 252 int64_t delta; 253 int64_t expire_time; 254 QEMUTimer *ts; 255 QEMUTimerList *timer_list; 256 QEMUClock *clock = qemu_clock_ptr(type); 257 258 if (!clock->enabled) { 259 return -1; 260 } 261 262 QLIST_FOREACH(timer_list, &clock->timerlists, list) { 263 qemu_mutex_lock(&timer_list->active_timers_lock); 264 ts = timer_list->active_timers; 265 /* Skip all external timers */ 266 while (ts && (ts->attributes & ~attr_mask)) { 267 ts = ts->next; 268 } 269 if (!ts) { 270 qemu_mutex_unlock(&timer_list->active_timers_lock); 271 continue; 272 } 273 expire_time = ts->expire_time; 274 qemu_mutex_unlock(&timer_list->active_timers_lock); 275 276 delta = expire_time - qemu_clock_get_ns(type); 277 if (delta <= 0) { 278 delta = 0; 279 } 280 deadline = qemu_soonest_timeout(deadline, delta); 281 } 282 return deadline; 283 } 284 285 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list) 286 { 287 return timer_list->clock->type; 288 } 289 290 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) 291 { 292 return main_loop_tlg.tl[type]; 293 } 294 295 void timerlist_notify(QEMUTimerList *timer_list) 296 { 297 if (timer_list->notify_cb) { 298 timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type); 299 } else { 300 qemu_notify_event(); 301 } 302 } 303 304 /* Transition function to convert a nanosecond timeout to ms 305 * This is used where a system does not support ppoll 306 */ 307 int qemu_timeout_ns_to_ms(int64_t ns) 308 { 309 int64_t ms; 310 if (ns < 0) { 311 return -1; 312 } 313 314 if (!ns) { 315 return 0; 316 } 317 318 /* Always round up, because it's better to wait too long than to wait too 319 * little and effectively busy-wait 320 */ 321 ms = DIV_ROUND_UP(ns, SCALE_MS); 322 323 /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */ 324 return MIN(ms, INT32_MAX); 325 } 326 327 328 /* qemu implementation of g_poll which uses a nanosecond timeout but is 329 * otherwise identical to g_poll 330 */ 331 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout) 332 { 333 #ifdef CONFIG_PPOLL 334 if (timeout < 0) { 335 return ppoll((struct pollfd *)fds, nfds, NULL, NULL); 336 } else { 337 struct timespec ts; 338 int64_t tvsec = timeout / 1000000000LL; 339 /* Avoid possibly overflowing and specifying a negative number of 340 * seconds, which would turn a very long timeout into a busy-wait. 341 */ 342 if (tvsec > (int64_t)INT32_MAX) { 343 tvsec = INT32_MAX; 344 } 345 ts.tv_sec = tvsec; 346 ts.tv_nsec = timeout % 1000000000LL; 347 return ppoll((struct pollfd *)fds, nfds, &ts, NULL); 348 } 349 #else 350 return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout)); 351 #endif 352 } 353 354 355 void timer_init_full(QEMUTimer *ts, 356 QEMUTimerListGroup *timer_list_group, QEMUClockType type, 357 int scale, int attributes, 358 QEMUTimerCB *cb, void *opaque) 359 { 360 if (!timer_list_group) { 361 timer_list_group = &main_loop_tlg; 362 } 363 ts->timer_list = timer_list_group->tl[type]; 364 ts->cb = cb; 365 ts->opaque = opaque; 366 ts->scale = scale; 367 ts->attributes = attributes; 368 ts->expire_time = -1; 369 } 370 371 void timer_deinit(QEMUTimer *ts) 372 { 373 assert(ts->expire_time == -1); 374 ts->timer_list = NULL; 375 } 376 377 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) 378 { 379 QEMUTimer **pt, *t; 380 381 ts->expire_time = -1; 382 pt = &timer_list->active_timers; 383 for(;;) { 384 t = *pt; 385 if (!t) 386 break; 387 if (t == ts) { 388 atomic_set(pt, t->next); 389 break; 390 } 391 pt = &t->next; 392 } 393 } 394 395 static bool timer_mod_ns_locked(QEMUTimerList *timer_list, 396 QEMUTimer *ts, int64_t expire_time) 397 { 398 QEMUTimer **pt, *t; 399 400 /* add the timer in the sorted list */ 401 pt = &timer_list->active_timers; 402 for (;;) { 403 t = *pt; 404 if (!timer_expired_ns(t, expire_time)) { 405 break; 406 } 407 pt = &t->next; 408 } 409 ts->expire_time = MAX(expire_time, 0); 410 ts->next = *pt; 411 atomic_set(pt, ts); 412 413 return pt == &timer_list->active_timers; 414 } 415 416 static void timerlist_rearm(QEMUTimerList *timer_list) 417 { 418 /* Interrupt execution to force deadline recalculation. */ 419 if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) { 420 qemu_start_warp_timer(); 421 } 422 timerlist_notify(timer_list); 423 } 424 425 /* stop a timer, but do not dealloc it */ 426 void timer_del(QEMUTimer *ts) 427 { 428 QEMUTimerList *timer_list = ts->timer_list; 429 430 if (timer_list) { 431 qemu_mutex_lock(&timer_list->active_timers_lock); 432 timer_del_locked(timer_list, ts); 433 qemu_mutex_unlock(&timer_list->active_timers_lock); 434 } 435 } 436 437 /* modify the current timer so that it will be fired when current_time 438 >= expire_time. The corresponding callback will be called. */ 439 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) 440 { 441 QEMUTimerList *timer_list = ts->timer_list; 442 bool rearm; 443 444 qemu_mutex_lock(&timer_list->active_timers_lock); 445 timer_del_locked(timer_list, ts); 446 rearm = timer_mod_ns_locked(timer_list, ts, expire_time); 447 qemu_mutex_unlock(&timer_list->active_timers_lock); 448 449 if (rearm) { 450 timerlist_rearm(timer_list); 451 } 452 } 453 454 /* modify the current timer so that it will be fired when current_time 455 >= expire_time or the current deadline, whichever comes earlier. 456 The corresponding callback will be called. */ 457 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time) 458 { 459 QEMUTimerList *timer_list = ts->timer_list; 460 bool rearm; 461 462 WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) { 463 if (ts->expire_time == -1 || ts->expire_time > expire_time) { 464 if (ts->expire_time != -1) { 465 timer_del_locked(timer_list, ts); 466 } 467 rearm = timer_mod_ns_locked(timer_list, ts, expire_time); 468 } else { 469 rearm = false; 470 } 471 } 472 if (rearm) { 473 timerlist_rearm(timer_list); 474 } 475 } 476 477 void timer_mod(QEMUTimer *ts, int64_t expire_time) 478 { 479 timer_mod_ns(ts, expire_time * ts->scale); 480 } 481 482 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time) 483 { 484 timer_mod_anticipate_ns(ts, expire_time * ts->scale); 485 } 486 487 bool timer_pending(QEMUTimer *ts) 488 { 489 return ts->expire_time >= 0; 490 } 491 492 bool timer_expired(QEMUTimer *timer_head, int64_t current_time) 493 { 494 return timer_expired_ns(timer_head, current_time * timer_head->scale); 495 } 496 497 bool timerlist_run_timers(QEMUTimerList *timer_list) 498 { 499 QEMUTimer *ts; 500 int64_t current_time; 501 bool progress = false; 502 QEMUTimerCB *cb; 503 void *opaque; 504 bool need_replay_checkpoint = false; 505 506 if (!atomic_read(&timer_list->active_timers)) { 507 return false; 508 } 509 510 qemu_event_reset(&timer_list->timers_done_ev); 511 if (!timer_list->clock->enabled) { 512 goto out; 513 } 514 515 switch (timer_list->clock->type) { 516 case QEMU_CLOCK_REALTIME: 517 break; 518 default: 519 case QEMU_CLOCK_VIRTUAL: 520 if (replay_mode != REPLAY_MODE_NONE) { 521 /* Checkpoint for virtual clock is redundant in cases where 522 * it's being triggered with only non-EXTERNAL timers, because 523 * these timers don't change guest state directly. 524 * Since it has conditional dependence on specific timers, it is 525 * subject to race conditions and requires special handling. 526 * See below. 527 */ 528 need_replay_checkpoint = true; 529 } 530 break; 531 case QEMU_CLOCK_HOST: 532 if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) { 533 goto out; 534 } 535 break; 536 case QEMU_CLOCK_VIRTUAL_RT: 537 if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) { 538 goto out; 539 } 540 break; 541 } 542 543 /* 544 * Extract expired timers from active timers list and and process them. 545 * 546 * In rr mode we need "filtered" checkpointing for virtual clock. The 547 * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer, 548 * and that must only be done once since the clock value stays the same. Because 549 * non-EXTERNAL timers may appear in the timers list while it being processed, 550 * the checkpoint can be issued at a time until no timers are left and we are 551 * done". 552 */ 553 current_time = qemu_clock_get_ns(timer_list->clock->type); 554 qemu_mutex_lock(&timer_list->active_timers_lock); 555 while ((ts = timer_list->active_timers)) { 556 if (!timer_expired_ns(ts, current_time)) { 557 /* No expired timers left. The checkpoint can be skipped 558 * if no timers fired or they were all external. 559 */ 560 break; 561 } 562 if (need_replay_checkpoint 563 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) { 564 /* once we got here, checkpoint clock only once */ 565 need_replay_checkpoint = false; 566 qemu_mutex_unlock(&timer_list->active_timers_lock); 567 if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { 568 goto out; 569 } 570 qemu_mutex_lock(&timer_list->active_timers_lock); 571 /* The lock was released; start over again in case the list was 572 * modified. 573 */ 574 continue; 575 } 576 577 /* remove timer from the list before calling the callback */ 578 timer_list->active_timers = ts->next; 579 ts->next = NULL; 580 ts->expire_time = -1; 581 cb = ts->cb; 582 opaque = ts->opaque; 583 584 /* run the callback (the timer list can be modified) */ 585 qemu_mutex_unlock(&timer_list->active_timers_lock); 586 cb(opaque); 587 qemu_mutex_lock(&timer_list->active_timers_lock); 588 589 progress = true; 590 } 591 qemu_mutex_unlock(&timer_list->active_timers_lock); 592 593 out: 594 qemu_event_set(&timer_list->timers_done_ev); 595 return progress; 596 } 597 598 bool qemu_clock_run_timers(QEMUClockType type) 599 { 600 return timerlist_run_timers(main_loop_tlg.tl[type]); 601 } 602 603 void timerlistgroup_init(QEMUTimerListGroup *tlg, 604 QEMUTimerListNotifyCB *cb, void *opaque) 605 { 606 QEMUClockType type; 607 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 608 tlg->tl[type] = timerlist_new(type, cb, opaque); 609 } 610 } 611 612 void timerlistgroup_deinit(QEMUTimerListGroup *tlg) 613 { 614 QEMUClockType type; 615 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 616 timerlist_free(tlg->tl[type]); 617 } 618 } 619 620 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg) 621 { 622 QEMUClockType type; 623 bool progress = false; 624 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 625 progress |= timerlist_run_timers(tlg->tl[type]); 626 } 627 return progress; 628 } 629 630 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg) 631 { 632 int64_t deadline = -1; 633 QEMUClockType type; 634 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 635 if (qemu_clock_use_for_deadline(type)) { 636 deadline = qemu_soonest_timeout(deadline, 637 timerlist_deadline_ns(tlg->tl[type])); 638 } 639 } 640 return deadline; 641 } 642 643 int64_t qemu_clock_get_ns(QEMUClockType type) 644 { 645 switch (type) { 646 case QEMU_CLOCK_REALTIME: 647 return get_clock(); 648 default: 649 case QEMU_CLOCK_VIRTUAL: 650 if (use_icount) { 651 return cpu_get_icount(); 652 } else { 653 return cpu_get_clock(); 654 } 655 case QEMU_CLOCK_HOST: 656 return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime()); 657 case QEMU_CLOCK_VIRTUAL_RT: 658 return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock()); 659 } 660 } 661 662 void init_clocks(QEMUTimerListNotifyCB *notify_cb) 663 { 664 QEMUClockType type; 665 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 666 qemu_clock_init(type, notify_cb); 667 } 668 669 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK 670 prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); 671 #endif 672 } 673 674 uint64_t timer_expire_time_ns(QEMUTimer *ts) 675 { 676 return timer_pending(ts) ? ts->expire_time : -1; 677 } 678 679 bool qemu_clock_run_all_timers(void) 680 { 681 bool progress = false; 682 QEMUClockType type; 683 684 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 685 if (qemu_clock_use_for_deadline(type)) { 686 progress |= qemu_clock_run_timers(type); 687 } 688 } 689 690 return progress; 691 } 692