1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/main-loop.h" 27 #include "qemu/timer.h" 28 #include "sysemu/replay.h" 29 #include "sysemu/cpus.h" 30 31 #ifdef CONFIG_POSIX 32 #include <pthread.h> 33 #endif 34 35 #ifdef CONFIG_PPOLL 36 #include <poll.h> 37 #endif 38 39 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK 40 #include <sys/prctl.h> 41 #endif 42 43 /***********************************************************/ 44 /* timers */ 45 46 typedef struct QEMUClock { 47 /* We rely on BQL to protect the timerlists */ 48 QLIST_HEAD(, QEMUTimerList) timerlists; 49 50 NotifierList reset_notifiers; 51 int64_t last; 52 53 QEMUClockType type; 54 bool enabled; 55 } QEMUClock; 56 57 QEMUTimerListGroup main_loop_tlg; 58 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX]; 59 60 /* A QEMUTimerList is a list of timers attached to a clock. More 61 * than one QEMUTimerList can be attached to each clock, for instance 62 * used by different AioContexts / threads. Each clock also has 63 * a list of the QEMUTimerLists associated with it, in order that 64 * reenabling the clock can call all the notifiers. 65 */ 66 67 struct QEMUTimerList { 68 QEMUClock *clock; 69 QemuMutex active_timers_lock; 70 QEMUTimer *active_timers; 71 QLIST_ENTRY(QEMUTimerList) list; 72 QEMUTimerListNotifyCB *notify_cb; 73 void *notify_opaque; 74 75 /* lightweight method to mark the end of timerlist's running */ 76 QemuEvent timers_done_ev; 77 }; 78 79 /** 80 * qemu_clock_ptr: 81 * @type: type of clock 82 * 83 * Translate a clock type into a pointer to QEMUClock object. 84 * 85 * Returns: a pointer to the QEMUClock object 86 */ 87 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type) 88 { 89 return &qemu_clocks[type]; 90 } 91 92 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) 93 { 94 return timer_head && (timer_head->expire_time <= current_time); 95 } 96 97 QEMUTimerList *timerlist_new(QEMUClockType type, 98 QEMUTimerListNotifyCB *cb, 99 void *opaque) 100 { 101 QEMUTimerList *timer_list; 102 QEMUClock *clock = qemu_clock_ptr(type); 103 104 timer_list = g_malloc0(sizeof(QEMUTimerList)); 105 qemu_event_init(&timer_list->timers_done_ev, true); 106 timer_list->clock = clock; 107 timer_list->notify_cb = cb; 108 timer_list->notify_opaque = opaque; 109 qemu_mutex_init(&timer_list->active_timers_lock); 110 QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list); 111 return timer_list; 112 } 113 114 void timerlist_free(QEMUTimerList *timer_list) 115 { 116 assert(!timerlist_has_timers(timer_list)); 117 if (timer_list->clock) { 118 QLIST_REMOVE(timer_list, list); 119 } 120 qemu_mutex_destroy(&timer_list->active_timers_lock); 121 g_free(timer_list); 122 } 123 124 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb) 125 { 126 QEMUClock *clock = qemu_clock_ptr(type); 127 128 /* Assert that the clock of type TYPE has not been initialized yet. */ 129 assert(main_loop_tlg.tl[type] == NULL); 130 131 clock->type = type; 132 clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true); 133 clock->last = INT64_MIN; 134 QLIST_INIT(&clock->timerlists); 135 notifier_list_init(&clock->reset_notifiers); 136 main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL); 137 } 138 139 bool qemu_clock_use_for_deadline(QEMUClockType type) 140 { 141 return !(use_icount && (type == QEMU_CLOCK_VIRTUAL)); 142 } 143 144 void qemu_clock_notify(QEMUClockType type) 145 { 146 QEMUTimerList *timer_list; 147 QEMUClock *clock = qemu_clock_ptr(type); 148 QLIST_FOREACH(timer_list, &clock->timerlists, list) { 149 timerlist_notify(timer_list); 150 } 151 } 152 153 /* Disabling the clock will wait for related timerlists to stop 154 * executing qemu_run_timers. Thus, this functions should not 155 * be used from the callback of a timer that is based on @clock. 156 * Doing so would cause a deadlock. 157 * 158 * Caller should hold BQL. 159 */ 160 void qemu_clock_enable(QEMUClockType type, bool enabled) 161 { 162 QEMUClock *clock = qemu_clock_ptr(type); 163 QEMUTimerList *tl; 164 bool old = clock->enabled; 165 clock->enabled = enabled; 166 if (enabled && !old) { 167 qemu_clock_notify(type); 168 } else if (!enabled && old) { 169 QLIST_FOREACH(tl, &clock->timerlists, list) { 170 qemu_event_wait(&tl->timers_done_ev); 171 } 172 } 173 } 174 175 bool timerlist_has_timers(QEMUTimerList *timer_list) 176 { 177 return !!atomic_read(&timer_list->active_timers); 178 } 179 180 bool qemu_clock_has_timers(QEMUClockType type) 181 { 182 return timerlist_has_timers( 183 main_loop_tlg.tl[type]); 184 } 185 186 bool timerlist_expired(QEMUTimerList *timer_list) 187 { 188 int64_t expire_time; 189 190 if (!atomic_read(&timer_list->active_timers)) { 191 return false; 192 } 193 194 qemu_mutex_lock(&timer_list->active_timers_lock); 195 if (!timer_list->active_timers) { 196 qemu_mutex_unlock(&timer_list->active_timers_lock); 197 return false; 198 } 199 expire_time = timer_list->active_timers->expire_time; 200 qemu_mutex_unlock(&timer_list->active_timers_lock); 201 202 return expire_time <= qemu_clock_get_ns(timer_list->clock->type); 203 } 204 205 bool qemu_clock_expired(QEMUClockType type) 206 { 207 return timerlist_expired( 208 main_loop_tlg.tl[type]); 209 } 210 211 /* 212 * As above, but return -1 for no deadline, and do not cap to 2^32 213 * as we know the result is always positive. 214 */ 215 216 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) 217 { 218 int64_t delta; 219 int64_t expire_time; 220 221 if (!atomic_read(&timer_list->active_timers)) { 222 return -1; 223 } 224 225 if (!timer_list->clock->enabled) { 226 return -1; 227 } 228 229 /* The active timers list may be modified before the caller uses our return 230 * value but ->notify_cb() is called when the deadline changes. Therefore 231 * the caller should notice the change and there is no race condition. 232 */ 233 qemu_mutex_lock(&timer_list->active_timers_lock); 234 if (!timer_list->active_timers) { 235 qemu_mutex_unlock(&timer_list->active_timers_lock); 236 return -1; 237 } 238 expire_time = timer_list->active_timers->expire_time; 239 qemu_mutex_unlock(&timer_list->active_timers_lock); 240 241 delta = expire_time - qemu_clock_get_ns(timer_list->clock->type); 242 243 if (delta <= 0) { 244 return 0; 245 } 246 247 return delta; 248 } 249 250 /* Calculate the soonest deadline across all timerlists attached 251 * to the clock. This is used for the icount timeout so we 252 * ignore whether or not the clock should be used in deadline 253 * calculations. 254 */ 255 int64_t qemu_clock_deadline_ns_all(QEMUClockType type) 256 { 257 int64_t deadline = -1; 258 QEMUTimerList *timer_list; 259 QEMUClock *clock = qemu_clock_ptr(type); 260 QLIST_FOREACH(timer_list, &clock->timerlists, list) { 261 deadline = qemu_soonest_timeout(deadline, 262 timerlist_deadline_ns(timer_list)); 263 } 264 return deadline; 265 } 266 267 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list) 268 { 269 return timer_list->clock->type; 270 } 271 272 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) 273 { 274 return main_loop_tlg.tl[type]; 275 } 276 277 void timerlist_notify(QEMUTimerList *timer_list) 278 { 279 if (timer_list->notify_cb) { 280 timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type); 281 } else { 282 qemu_notify_event(); 283 } 284 } 285 286 /* Transition function to convert a nanosecond timeout to ms 287 * This is used where a system does not support ppoll 288 */ 289 int qemu_timeout_ns_to_ms(int64_t ns) 290 { 291 int64_t ms; 292 if (ns < 0) { 293 return -1; 294 } 295 296 if (!ns) { 297 return 0; 298 } 299 300 /* Always round up, because it's better to wait too long than to wait too 301 * little and effectively busy-wait 302 */ 303 ms = DIV_ROUND_UP(ns, SCALE_MS); 304 305 /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */ 306 if (ms > (int64_t) INT32_MAX) { 307 ms = INT32_MAX; 308 } 309 310 return (int) ms; 311 } 312 313 314 /* qemu implementation of g_poll which uses a nanosecond timeout but is 315 * otherwise identical to g_poll 316 */ 317 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout) 318 { 319 #ifdef CONFIG_PPOLL 320 if (timeout < 0) { 321 return ppoll((struct pollfd *)fds, nfds, NULL, NULL); 322 } else { 323 struct timespec ts; 324 int64_t tvsec = timeout / 1000000000LL; 325 /* Avoid possibly overflowing and specifying a negative number of 326 * seconds, which would turn a very long timeout into a busy-wait. 327 */ 328 if (tvsec > (int64_t)INT32_MAX) { 329 tvsec = INT32_MAX; 330 } 331 ts.tv_sec = tvsec; 332 ts.tv_nsec = timeout % 1000000000LL; 333 return ppoll((struct pollfd *)fds, nfds, &ts, NULL); 334 } 335 #else 336 return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout)); 337 #endif 338 } 339 340 341 void timer_init_full(QEMUTimer *ts, 342 QEMUTimerListGroup *timer_list_group, QEMUClockType type, 343 int scale, int attributes, 344 QEMUTimerCB *cb, void *opaque) 345 { 346 if (!timer_list_group) { 347 timer_list_group = &main_loop_tlg; 348 } 349 ts->timer_list = timer_list_group->tl[type]; 350 ts->cb = cb; 351 ts->opaque = opaque; 352 ts->scale = scale; 353 ts->attributes = attributes; 354 ts->expire_time = -1; 355 } 356 357 void timer_deinit(QEMUTimer *ts) 358 { 359 assert(ts->expire_time == -1); 360 ts->timer_list = NULL; 361 } 362 363 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) 364 { 365 QEMUTimer **pt, *t; 366 367 ts->expire_time = -1; 368 pt = &timer_list->active_timers; 369 for(;;) { 370 t = *pt; 371 if (!t) 372 break; 373 if (t == ts) { 374 atomic_set(pt, t->next); 375 break; 376 } 377 pt = &t->next; 378 } 379 } 380 381 static bool timer_mod_ns_locked(QEMUTimerList *timer_list, 382 QEMUTimer *ts, int64_t expire_time) 383 { 384 QEMUTimer **pt, *t; 385 386 /* add the timer in the sorted list */ 387 pt = &timer_list->active_timers; 388 for (;;) { 389 t = *pt; 390 if (!timer_expired_ns(t, expire_time)) { 391 break; 392 } 393 pt = &t->next; 394 } 395 ts->expire_time = MAX(expire_time, 0); 396 ts->next = *pt; 397 atomic_set(pt, ts); 398 399 return pt == &timer_list->active_timers; 400 } 401 402 static void timerlist_rearm(QEMUTimerList *timer_list) 403 { 404 /* Interrupt execution to force deadline recalculation. */ 405 if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) { 406 qemu_start_warp_timer(); 407 } 408 timerlist_notify(timer_list); 409 } 410 411 /* stop a timer, but do not dealloc it */ 412 void timer_del(QEMUTimer *ts) 413 { 414 QEMUTimerList *timer_list = ts->timer_list; 415 416 if (timer_list) { 417 qemu_mutex_lock(&timer_list->active_timers_lock); 418 timer_del_locked(timer_list, ts); 419 qemu_mutex_unlock(&timer_list->active_timers_lock); 420 } 421 } 422 423 /* modify the current timer so that it will be fired when current_time 424 >= expire_time. The corresponding callback will be called. */ 425 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) 426 { 427 QEMUTimerList *timer_list = ts->timer_list; 428 bool rearm; 429 430 qemu_mutex_lock(&timer_list->active_timers_lock); 431 timer_del_locked(timer_list, ts); 432 rearm = timer_mod_ns_locked(timer_list, ts, expire_time); 433 qemu_mutex_unlock(&timer_list->active_timers_lock); 434 435 if (rearm) { 436 timerlist_rearm(timer_list); 437 } 438 } 439 440 /* modify the current timer so that it will be fired when current_time 441 >= expire_time or the current deadline, whichever comes earlier. 442 The corresponding callback will be called. */ 443 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time) 444 { 445 QEMUTimerList *timer_list = ts->timer_list; 446 bool rearm; 447 448 qemu_mutex_lock(&timer_list->active_timers_lock); 449 if (ts->expire_time == -1 || ts->expire_time > expire_time) { 450 if (ts->expire_time != -1) { 451 timer_del_locked(timer_list, ts); 452 } 453 rearm = timer_mod_ns_locked(timer_list, ts, expire_time); 454 } else { 455 rearm = false; 456 } 457 qemu_mutex_unlock(&timer_list->active_timers_lock); 458 459 if (rearm) { 460 timerlist_rearm(timer_list); 461 } 462 } 463 464 void timer_mod(QEMUTimer *ts, int64_t expire_time) 465 { 466 timer_mod_ns(ts, expire_time * ts->scale); 467 } 468 469 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time) 470 { 471 timer_mod_anticipate_ns(ts, expire_time * ts->scale); 472 } 473 474 bool timer_pending(QEMUTimer *ts) 475 { 476 return ts->expire_time >= 0; 477 } 478 479 bool timer_expired(QEMUTimer *timer_head, int64_t current_time) 480 { 481 return timer_expired_ns(timer_head, current_time * timer_head->scale); 482 } 483 484 bool timerlist_run_timers(QEMUTimerList *timer_list) 485 { 486 QEMUTimer *ts; 487 int64_t current_time; 488 bool progress = false; 489 QEMUTimerCB *cb; 490 void *opaque; 491 bool need_replay_checkpoint = false; 492 493 if (!atomic_read(&timer_list->active_timers)) { 494 return false; 495 } 496 497 qemu_event_reset(&timer_list->timers_done_ev); 498 if (!timer_list->clock->enabled) { 499 goto out; 500 } 501 502 switch (timer_list->clock->type) { 503 case QEMU_CLOCK_REALTIME: 504 break; 505 default: 506 case QEMU_CLOCK_VIRTUAL: 507 if (replay_mode != REPLAY_MODE_NONE) { 508 /* Checkpoint for virtual clock is redundant in cases where 509 * it's being triggered with only non-EXTERNAL timers, because 510 * these timers don't change guest state directly. 511 * Since it has conditional dependence on specific timers, it is 512 * subject to race conditions and requires special handling. 513 * See below. 514 */ 515 need_replay_checkpoint = true; 516 } 517 break; 518 case QEMU_CLOCK_HOST: 519 if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) { 520 goto out; 521 } 522 break; 523 case QEMU_CLOCK_VIRTUAL_RT: 524 if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) { 525 goto out; 526 } 527 break; 528 } 529 530 /* 531 * Extract expired timers from active timers list and and process them. 532 * 533 * In rr mode we need "filtered" checkpointing for virtual clock. The 534 * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer, 535 * and that must only be done once since the clock value stays the same. Because 536 * non-EXTERNAL timers may appear in the timers list while it being processed, 537 * the checkpoint can be issued at a time until no timers are left and we are 538 * done". 539 */ 540 current_time = qemu_clock_get_ns(timer_list->clock->type); 541 qemu_mutex_lock(&timer_list->active_timers_lock); 542 while ((ts = timer_list->active_timers)) { 543 if (!timer_expired_ns(ts, current_time)) { 544 /* No expired timers left. The checkpoint can be skipped 545 * if no timers fired or they were all external. 546 */ 547 break; 548 } 549 if (need_replay_checkpoint 550 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) { 551 /* once we got here, checkpoint clock only once */ 552 need_replay_checkpoint = false; 553 qemu_mutex_unlock(&timer_list->active_timers_lock); 554 if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { 555 goto out; 556 } 557 qemu_mutex_lock(&timer_list->active_timers_lock); 558 /* The lock was released; start over again in case the list was 559 * modified. 560 */ 561 continue; 562 } 563 564 /* remove timer from the list before calling the callback */ 565 timer_list->active_timers = ts->next; 566 ts->next = NULL; 567 ts->expire_time = -1; 568 cb = ts->cb; 569 opaque = ts->opaque; 570 571 /* run the callback (the timer list can be modified) */ 572 qemu_mutex_unlock(&timer_list->active_timers_lock); 573 cb(opaque); 574 qemu_mutex_lock(&timer_list->active_timers_lock); 575 576 progress = true; 577 } 578 qemu_mutex_unlock(&timer_list->active_timers_lock); 579 580 out: 581 qemu_event_set(&timer_list->timers_done_ev); 582 return progress; 583 } 584 585 bool qemu_clock_run_timers(QEMUClockType type) 586 { 587 return timerlist_run_timers(main_loop_tlg.tl[type]); 588 } 589 590 void timerlistgroup_init(QEMUTimerListGroup *tlg, 591 QEMUTimerListNotifyCB *cb, void *opaque) 592 { 593 QEMUClockType type; 594 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 595 tlg->tl[type] = timerlist_new(type, cb, opaque); 596 } 597 } 598 599 void timerlistgroup_deinit(QEMUTimerListGroup *tlg) 600 { 601 QEMUClockType type; 602 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 603 timerlist_free(tlg->tl[type]); 604 } 605 } 606 607 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg) 608 { 609 QEMUClockType type; 610 bool progress = false; 611 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 612 progress |= timerlist_run_timers(tlg->tl[type]); 613 } 614 return progress; 615 } 616 617 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg) 618 { 619 int64_t deadline = -1; 620 QEMUClockType type; 621 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 622 if (qemu_clock_use_for_deadline(type)) { 623 deadline = qemu_soonest_timeout(deadline, 624 timerlist_deadline_ns(tlg->tl[type])); 625 } 626 } 627 return deadline; 628 } 629 630 int64_t qemu_clock_get_ns(QEMUClockType type) 631 { 632 int64_t now, last; 633 QEMUClock *clock = qemu_clock_ptr(type); 634 635 switch (type) { 636 case QEMU_CLOCK_REALTIME: 637 return get_clock(); 638 default: 639 case QEMU_CLOCK_VIRTUAL: 640 if (use_icount) { 641 return cpu_get_icount(); 642 } else { 643 return cpu_get_clock(); 644 } 645 case QEMU_CLOCK_HOST: 646 now = REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime()); 647 last = clock->last; 648 clock->last = now; 649 if (now < last || now > (last + get_max_clock_jump())) { 650 notifier_list_notify(&clock->reset_notifiers, &now); 651 } 652 return now; 653 case QEMU_CLOCK_VIRTUAL_RT: 654 return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock()); 655 } 656 } 657 658 uint64_t qemu_clock_get_last(QEMUClockType type) 659 { 660 QEMUClock *clock = qemu_clock_ptr(type); 661 return clock->last; 662 } 663 664 void qemu_clock_set_last(QEMUClockType type, uint64_t last) 665 { 666 QEMUClock *clock = qemu_clock_ptr(type); 667 clock->last = last; 668 } 669 670 void qemu_clock_register_reset_notifier(QEMUClockType type, 671 Notifier *notifier) 672 { 673 QEMUClock *clock = qemu_clock_ptr(type); 674 notifier_list_add(&clock->reset_notifiers, notifier); 675 } 676 677 void qemu_clock_unregister_reset_notifier(QEMUClockType type, 678 Notifier *notifier) 679 { 680 notifier_remove(notifier); 681 } 682 683 void init_clocks(QEMUTimerListNotifyCB *notify_cb) 684 { 685 QEMUClockType type; 686 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 687 qemu_clock_init(type, notify_cb); 688 } 689 690 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK 691 prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); 692 #endif 693 } 694 695 uint64_t timer_expire_time_ns(QEMUTimer *ts) 696 { 697 return timer_pending(ts) ? ts->expire_time : -1; 698 } 699 700 bool qemu_clock_run_all_timers(void) 701 { 702 bool progress = false; 703 QEMUClockType type; 704 705 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 706 if (qemu_clock_use_for_deadline(type)) { 707 progress |= qemu_clock_run_timers(type); 708 } 709 } 710 711 return progress; 712 } 713