1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qemu/main-loop.h" 27 #include "qemu/timer.h" 28 #include "sysemu/replay.h" 29 #include "sysemu/sysemu.h" 30 #include "sysemu/cpus.h" 31 32 #ifdef CONFIG_POSIX 33 #include <pthread.h> 34 #endif 35 36 #ifdef CONFIG_PPOLL 37 #include <poll.h> 38 #endif 39 40 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK 41 #include <sys/prctl.h> 42 #endif 43 44 /***********************************************************/ 45 /* timers */ 46 47 typedef struct QEMUClock { 48 /* We rely on BQL to protect the timerlists */ 49 QLIST_HEAD(, QEMUTimerList) timerlists; 50 51 NotifierList reset_notifiers; 52 int64_t last; 53 54 QEMUClockType type; 55 bool enabled; 56 } QEMUClock; 57 58 QEMUTimerListGroup main_loop_tlg; 59 static QEMUClock qemu_clocks[QEMU_CLOCK_MAX]; 60 61 /* A QEMUTimerList is a list of timers attached to a clock. More 62 * than one QEMUTimerList can be attached to each clock, for instance 63 * used by different AioContexts / threads. Each clock also has 64 * a list of the QEMUTimerLists associated with it, in order that 65 * reenabling the clock can call all the notifiers. 66 */ 67 68 struct QEMUTimerList { 69 QEMUClock *clock; 70 QemuMutex active_timers_lock; 71 QEMUTimer *active_timers; 72 QLIST_ENTRY(QEMUTimerList) list; 73 QEMUTimerListNotifyCB *notify_cb; 74 void *notify_opaque; 75 76 /* lightweight method to mark the end of timerlist's running */ 77 QemuEvent timers_done_ev; 78 }; 79 80 /** 81 * qemu_clock_ptr: 82 * @type: type of clock 83 * 84 * Translate a clock type into a pointer to QEMUClock object. 85 * 86 * Returns: a pointer to the QEMUClock object 87 */ 88 static inline QEMUClock *qemu_clock_ptr(QEMUClockType type) 89 { 90 return &qemu_clocks[type]; 91 } 92 93 static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time) 94 { 95 return timer_head && (timer_head->expire_time <= current_time); 96 } 97 98 QEMUTimerList *timerlist_new(QEMUClockType type, 99 QEMUTimerListNotifyCB *cb, 100 void *opaque) 101 { 102 QEMUTimerList *timer_list; 103 QEMUClock *clock = qemu_clock_ptr(type); 104 105 timer_list = g_malloc0(sizeof(QEMUTimerList)); 106 qemu_event_init(&timer_list->timers_done_ev, true); 107 timer_list->clock = clock; 108 timer_list->notify_cb = cb; 109 timer_list->notify_opaque = opaque; 110 qemu_mutex_init(&timer_list->active_timers_lock); 111 QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list); 112 return timer_list; 113 } 114 115 void timerlist_free(QEMUTimerList *timer_list) 116 { 117 assert(!timerlist_has_timers(timer_list)); 118 if (timer_list->clock) { 119 QLIST_REMOVE(timer_list, list); 120 } 121 qemu_mutex_destroy(&timer_list->active_timers_lock); 122 g_free(timer_list); 123 } 124 125 static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb) 126 { 127 QEMUClock *clock = qemu_clock_ptr(type); 128 129 /* Assert that the clock of type TYPE has not been initialized yet. */ 130 assert(main_loop_tlg.tl[type] == NULL); 131 132 clock->type = type; 133 clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true); 134 clock->last = INT64_MIN; 135 QLIST_INIT(&clock->timerlists); 136 notifier_list_init(&clock->reset_notifiers); 137 main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL); 138 } 139 140 bool qemu_clock_use_for_deadline(QEMUClockType type) 141 { 142 return !(use_icount && (type == QEMU_CLOCK_VIRTUAL)); 143 } 144 145 void qemu_clock_notify(QEMUClockType type) 146 { 147 QEMUTimerList *timer_list; 148 QEMUClock *clock = qemu_clock_ptr(type); 149 QLIST_FOREACH(timer_list, &clock->timerlists, list) { 150 timerlist_notify(timer_list); 151 } 152 } 153 154 /* Disabling the clock will wait for related timerlists to stop 155 * executing qemu_run_timers. Thus, this functions should not 156 * be used from the callback of a timer that is based on @clock. 157 * Doing so would cause a deadlock. 158 * 159 * Caller should hold BQL. 160 */ 161 void qemu_clock_enable(QEMUClockType type, bool enabled) 162 { 163 QEMUClock *clock = qemu_clock_ptr(type); 164 QEMUTimerList *tl; 165 bool old = clock->enabled; 166 clock->enabled = enabled; 167 if (enabled && !old) { 168 qemu_clock_notify(type); 169 } else if (!enabled && old) { 170 QLIST_FOREACH(tl, &clock->timerlists, list) { 171 qemu_event_wait(&tl->timers_done_ev); 172 } 173 } 174 } 175 176 bool timerlist_has_timers(QEMUTimerList *timer_list) 177 { 178 return !!atomic_read(&timer_list->active_timers); 179 } 180 181 bool qemu_clock_has_timers(QEMUClockType type) 182 { 183 return timerlist_has_timers( 184 main_loop_tlg.tl[type]); 185 } 186 187 bool timerlist_expired(QEMUTimerList *timer_list) 188 { 189 int64_t expire_time; 190 191 if (!atomic_read(&timer_list->active_timers)) { 192 return false; 193 } 194 195 qemu_mutex_lock(&timer_list->active_timers_lock); 196 if (!timer_list->active_timers) { 197 qemu_mutex_unlock(&timer_list->active_timers_lock); 198 return false; 199 } 200 expire_time = timer_list->active_timers->expire_time; 201 qemu_mutex_unlock(&timer_list->active_timers_lock); 202 203 return expire_time <= qemu_clock_get_ns(timer_list->clock->type); 204 } 205 206 bool qemu_clock_expired(QEMUClockType type) 207 { 208 return timerlist_expired( 209 main_loop_tlg.tl[type]); 210 } 211 212 /* 213 * As above, but return -1 for no deadline, and do not cap to 2^32 214 * as we know the result is always positive. 215 */ 216 217 int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) 218 { 219 int64_t delta; 220 int64_t expire_time; 221 222 if (!atomic_read(&timer_list->active_timers)) { 223 return -1; 224 } 225 226 if (!timer_list->clock->enabled) { 227 return -1; 228 } 229 230 /* The active timers list may be modified before the caller uses our return 231 * value but ->notify_cb() is called when the deadline changes. Therefore 232 * the caller should notice the change and there is no race condition. 233 */ 234 qemu_mutex_lock(&timer_list->active_timers_lock); 235 if (!timer_list->active_timers) { 236 qemu_mutex_unlock(&timer_list->active_timers_lock); 237 return -1; 238 } 239 expire_time = timer_list->active_timers->expire_time; 240 qemu_mutex_unlock(&timer_list->active_timers_lock); 241 242 delta = expire_time - qemu_clock_get_ns(timer_list->clock->type); 243 244 if (delta <= 0) { 245 return 0; 246 } 247 248 return delta; 249 } 250 251 /* Calculate the soonest deadline across all timerlists attached 252 * to the clock. This is used for the icount timeout so we 253 * ignore whether or not the clock should be used in deadline 254 * calculations. 255 */ 256 int64_t qemu_clock_deadline_ns_all(QEMUClockType type) 257 { 258 int64_t deadline = -1; 259 QEMUTimerList *timer_list; 260 QEMUClock *clock = qemu_clock_ptr(type); 261 QLIST_FOREACH(timer_list, &clock->timerlists, list) { 262 deadline = qemu_soonest_timeout(deadline, 263 timerlist_deadline_ns(timer_list)); 264 } 265 return deadline; 266 } 267 268 QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list) 269 { 270 return timer_list->clock->type; 271 } 272 273 QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) 274 { 275 return main_loop_tlg.tl[type]; 276 } 277 278 void timerlist_notify(QEMUTimerList *timer_list) 279 { 280 if (timer_list->notify_cb) { 281 timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type); 282 } else { 283 qemu_notify_event(); 284 } 285 } 286 287 /* Transition function to convert a nanosecond timeout to ms 288 * This is used where a system does not support ppoll 289 */ 290 int qemu_timeout_ns_to_ms(int64_t ns) 291 { 292 int64_t ms; 293 if (ns < 0) { 294 return -1; 295 } 296 297 if (!ns) { 298 return 0; 299 } 300 301 /* Always round up, because it's better to wait too long than to wait too 302 * little and effectively busy-wait 303 */ 304 ms = DIV_ROUND_UP(ns, SCALE_MS); 305 306 /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */ 307 if (ms > (int64_t) INT32_MAX) { 308 ms = INT32_MAX; 309 } 310 311 return (int) ms; 312 } 313 314 315 /* qemu implementation of g_poll which uses a nanosecond timeout but is 316 * otherwise identical to g_poll 317 */ 318 int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout) 319 { 320 #ifdef CONFIG_PPOLL 321 if (timeout < 0) { 322 return ppoll((struct pollfd *)fds, nfds, NULL, NULL); 323 } else { 324 struct timespec ts; 325 int64_t tvsec = timeout / 1000000000LL; 326 /* Avoid possibly overflowing and specifying a negative number of 327 * seconds, which would turn a very long timeout into a busy-wait. 328 */ 329 if (tvsec > (int64_t)INT32_MAX) { 330 tvsec = INT32_MAX; 331 } 332 ts.tv_sec = tvsec; 333 ts.tv_nsec = timeout % 1000000000LL; 334 return ppoll((struct pollfd *)fds, nfds, &ts, NULL); 335 } 336 #else 337 return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout)); 338 #endif 339 } 340 341 342 void timer_init_full(QEMUTimer *ts, 343 QEMUTimerListGroup *timer_list_group, QEMUClockType type, 344 int scale, int attributes, 345 QEMUTimerCB *cb, void *opaque) 346 { 347 if (!timer_list_group) { 348 timer_list_group = &main_loop_tlg; 349 } 350 ts->timer_list = timer_list_group->tl[type]; 351 ts->cb = cb; 352 ts->opaque = opaque; 353 ts->scale = scale; 354 ts->attributes = attributes; 355 ts->expire_time = -1; 356 } 357 358 void timer_deinit(QEMUTimer *ts) 359 { 360 assert(ts->expire_time == -1); 361 ts->timer_list = NULL; 362 } 363 364 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) 365 { 366 QEMUTimer **pt, *t; 367 368 ts->expire_time = -1; 369 pt = &timer_list->active_timers; 370 for(;;) { 371 t = *pt; 372 if (!t) 373 break; 374 if (t == ts) { 375 atomic_set(pt, t->next); 376 break; 377 } 378 pt = &t->next; 379 } 380 } 381 382 static bool timer_mod_ns_locked(QEMUTimerList *timer_list, 383 QEMUTimer *ts, int64_t expire_time) 384 { 385 QEMUTimer **pt, *t; 386 387 /* add the timer in the sorted list */ 388 pt = &timer_list->active_timers; 389 for (;;) { 390 t = *pt; 391 if (!timer_expired_ns(t, expire_time)) { 392 break; 393 } 394 pt = &t->next; 395 } 396 ts->expire_time = MAX(expire_time, 0); 397 ts->next = *pt; 398 atomic_set(pt, ts); 399 400 return pt == &timer_list->active_timers; 401 } 402 403 static void timerlist_rearm(QEMUTimerList *timer_list) 404 { 405 /* Interrupt execution to force deadline recalculation. */ 406 if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) { 407 qemu_start_warp_timer(); 408 } 409 timerlist_notify(timer_list); 410 } 411 412 /* stop a timer, but do not dealloc it */ 413 void timer_del(QEMUTimer *ts) 414 { 415 QEMUTimerList *timer_list = ts->timer_list; 416 417 if (timer_list) { 418 qemu_mutex_lock(&timer_list->active_timers_lock); 419 timer_del_locked(timer_list, ts); 420 qemu_mutex_unlock(&timer_list->active_timers_lock); 421 } 422 } 423 424 /* modify the current timer so that it will be fired when current_time 425 >= expire_time. The corresponding callback will be called. */ 426 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) 427 { 428 QEMUTimerList *timer_list = ts->timer_list; 429 bool rearm; 430 431 qemu_mutex_lock(&timer_list->active_timers_lock); 432 timer_del_locked(timer_list, ts); 433 rearm = timer_mod_ns_locked(timer_list, ts, expire_time); 434 qemu_mutex_unlock(&timer_list->active_timers_lock); 435 436 if (rearm) { 437 timerlist_rearm(timer_list); 438 } 439 } 440 441 /* modify the current timer so that it will be fired when current_time 442 >= expire_time or the current deadline, whichever comes earlier. 443 The corresponding callback will be called. */ 444 void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time) 445 { 446 QEMUTimerList *timer_list = ts->timer_list; 447 bool rearm; 448 449 qemu_mutex_lock(&timer_list->active_timers_lock); 450 if (ts->expire_time == -1 || ts->expire_time > expire_time) { 451 if (ts->expire_time != -1) { 452 timer_del_locked(timer_list, ts); 453 } 454 rearm = timer_mod_ns_locked(timer_list, ts, expire_time); 455 } else { 456 rearm = false; 457 } 458 qemu_mutex_unlock(&timer_list->active_timers_lock); 459 460 if (rearm) { 461 timerlist_rearm(timer_list); 462 } 463 } 464 465 void timer_mod(QEMUTimer *ts, int64_t expire_time) 466 { 467 timer_mod_ns(ts, expire_time * ts->scale); 468 } 469 470 void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time) 471 { 472 timer_mod_anticipate_ns(ts, expire_time * ts->scale); 473 } 474 475 bool timer_pending(QEMUTimer *ts) 476 { 477 return ts->expire_time >= 0; 478 } 479 480 bool timer_expired(QEMUTimer *timer_head, int64_t current_time) 481 { 482 return timer_expired_ns(timer_head, current_time * timer_head->scale); 483 } 484 485 bool timerlist_run_timers(QEMUTimerList *timer_list) 486 { 487 QEMUTimer *ts; 488 int64_t current_time; 489 bool progress = false; 490 QEMUTimerCB *cb; 491 void *opaque; 492 bool need_replay_checkpoint = false; 493 494 if (!atomic_read(&timer_list->active_timers)) { 495 return false; 496 } 497 498 qemu_event_reset(&timer_list->timers_done_ev); 499 if (!timer_list->clock->enabled) { 500 goto out; 501 } 502 503 switch (timer_list->clock->type) { 504 case QEMU_CLOCK_REALTIME: 505 break; 506 default: 507 case QEMU_CLOCK_VIRTUAL: 508 if (replay_mode != REPLAY_MODE_NONE) { 509 /* Checkpoint for virtual clock is redundant in cases where 510 * it's being triggered with only non-EXTERNAL timers, because 511 * these timers don't change guest state directly. 512 * Since it has conditional dependence on specific timers, it is 513 * subject to race conditions and requires special handling. 514 * See below. 515 */ 516 need_replay_checkpoint = true; 517 } 518 break; 519 case QEMU_CLOCK_HOST: 520 if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) { 521 goto out; 522 } 523 break; 524 case QEMU_CLOCK_VIRTUAL_RT: 525 if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) { 526 goto out; 527 } 528 break; 529 } 530 531 /* 532 * Extract expired timers from active timers list and and process them. 533 * 534 * In rr mode we need "filtered" checkpointing for virtual clock. The 535 * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer, 536 * and that must only be done once since the clock value stays the same. Because 537 * non-EXTERNAL timers may appear in the timers list while it being processed, 538 * the checkpoint can be issued at a time until no timers are left and we are 539 * done". 540 */ 541 current_time = qemu_clock_get_ns(timer_list->clock->type); 542 qemu_mutex_lock(&timer_list->active_timers_lock); 543 while ((ts = timer_list->active_timers)) { 544 if (!timer_expired_ns(ts, current_time)) { 545 /* No expired timers left. The checkpoint can be skipped 546 * if no timers fired or they were all external. 547 */ 548 break; 549 } 550 if (need_replay_checkpoint 551 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) { 552 /* once we got here, checkpoint clock only once */ 553 need_replay_checkpoint = false; 554 qemu_mutex_unlock(&timer_list->active_timers_lock); 555 if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { 556 goto out; 557 } 558 qemu_mutex_lock(&timer_list->active_timers_lock); 559 /* The lock was released; start over again in case the list was 560 * modified. 561 */ 562 continue; 563 } 564 565 /* remove timer from the list before calling the callback */ 566 timer_list->active_timers = ts->next; 567 ts->next = NULL; 568 ts->expire_time = -1; 569 cb = ts->cb; 570 opaque = ts->opaque; 571 572 /* run the callback (the timer list can be modified) */ 573 qemu_mutex_unlock(&timer_list->active_timers_lock); 574 cb(opaque); 575 qemu_mutex_lock(&timer_list->active_timers_lock); 576 577 progress = true; 578 } 579 qemu_mutex_unlock(&timer_list->active_timers_lock); 580 581 out: 582 qemu_event_set(&timer_list->timers_done_ev); 583 return progress; 584 } 585 586 bool qemu_clock_run_timers(QEMUClockType type) 587 { 588 return timerlist_run_timers(main_loop_tlg.tl[type]); 589 } 590 591 void timerlistgroup_init(QEMUTimerListGroup *tlg, 592 QEMUTimerListNotifyCB *cb, void *opaque) 593 { 594 QEMUClockType type; 595 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 596 tlg->tl[type] = timerlist_new(type, cb, opaque); 597 } 598 } 599 600 void timerlistgroup_deinit(QEMUTimerListGroup *tlg) 601 { 602 QEMUClockType type; 603 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 604 timerlist_free(tlg->tl[type]); 605 } 606 } 607 608 bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg) 609 { 610 QEMUClockType type; 611 bool progress = false; 612 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 613 progress |= timerlist_run_timers(tlg->tl[type]); 614 } 615 return progress; 616 } 617 618 int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg) 619 { 620 int64_t deadline = -1; 621 QEMUClockType type; 622 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 623 if (qemu_clock_use_for_deadline(type)) { 624 deadline = qemu_soonest_timeout(deadline, 625 timerlist_deadline_ns(tlg->tl[type])); 626 } 627 } 628 return deadline; 629 } 630 631 int64_t qemu_clock_get_ns(QEMUClockType type) 632 { 633 int64_t now, last; 634 QEMUClock *clock = qemu_clock_ptr(type); 635 636 switch (type) { 637 case QEMU_CLOCK_REALTIME: 638 return get_clock(); 639 default: 640 case QEMU_CLOCK_VIRTUAL: 641 if (use_icount) { 642 return cpu_get_icount(); 643 } else { 644 return cpu_get_clock(); 645 } 646 case QEMU_CLOCK_HOST: 647 now = REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime()); 648 last = clock->last; 649 clock->last = now; 650 if (now < last || now > (last + get_max_clock_jump())) { 651 notifier_list_notify(&clock->reset_notifiers, &now); 652 } 653 return now; 654 case QEMU_CLOCK_VIRTUAL_RT: 655 return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock()); 656 } 657 } 658 659 uint64_t qemu_clock_get_last(QEMUClockType type) 660 { 661 QEMUClock *clock = qemu_clock_ptr(type); 662 return clock->last; 663 } 664 665 void qemu_clock_set_last(QEMUClockType type, uint64_t last) 666 { 667 QEMUClock *clock = qemu_clock_ptr(type); 668 clock->last = last; 669 } 670 671 void qemu_clock_register_reset_notifier(QEMUClockType type, 672 Notifier *notifier) 673 { 674 QEMUClock *clock = qemu_clock_ptr(type); 675 notifier_list_add(&clock->reset_notifiers, notifier); 676 } 677 678 void qemu_clock_unregister_reset_notifier(QEMUClockType type, 679 Notifier *notifier) 680 { 681 notifier_remove(notifier); 682 } 683 684 void init_clocks(QEMUTimerListNotifyCB *notify_cb) 685 { 686 QEMUClockType type; 687 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 688 qemu_clock_init(type, notify_cb); 689 } 690 691 #ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK 692 prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); 693 #endif 694 } 695 696 uint64_t timer_expire_time_ns(QEMUTimer *ts) 697 { 698 return timer_pending(ts) ? ts->expire_time : -1; 699 } 700 701 bool qemu_clock_run_all_timers(void) 702 { 703 bool progress = false; 704 QEMUClockType type; 705 706 for (type = 0; type < QEMU_CLOCK_MAX; type++) { 707 if (qemu_clock_use_for_deadline(type)) { 708 progress |= qemu_clock_run_timers(type); 709 } 710 } 711 712 return progress; 713 } 714