1 /* 2 * Wrappers around mutex/cond/thread functions 3 * 4 * Copyright Red Hat, Inc. 2009 5 * 6 * Author: 7 * Marcelo Tosatti <mtosatti@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 #include "qemu/osdep.h" 14 #include "qemu/thread.h" 15 #include "qemu/atomic.h" 16 #include "qemu/notify.h" 17 #include "qemu-thread-common.h" 18 #include "qemu/tsan.h" 19 #include "qemu/bitmap.h" 20 21 #ifdef CONFIG_PTHREAD_SET_NAME_NP 22 #include <pthread_np.h> 23 #endif 24 25 static bool name_threads; 26 27 void qemu_thread_naming(bool enable) 28 { 29 name_threads = enable; 30 31 #if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \ 32 !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \ 33 !defined CONFIG_PTHREAD_SET_NAME_NP 34 /* This is a debugging option, not fatal */ 35 if (enable) { 36 fprintf(stderr, "qemu: thread naming not supported on this host\n"); 37 } 38 #endif 39 } 40 41 static void error_exit(int err, const char *msg) 42 { 43 fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err)); 44 abort(); 45 } 46 47 static inline clockid_t qemu_timedwait_clockid(void) 48 { 49 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK 50 return CLOCK_MONOTONIC; 51 #else 52 return CLOCK_REALTIME; 53 #endif 54 } 55 56 static void compute_abs_deadline(struct timespec *ts, int ms) 57 { 58 clock_gettime(qemu_timedwait_clockid(), ts); 59 ts->tv_nsec += (ms % 1000) * 1000000; 60 ts->tv_sec += ms / 1000; 61 if (ts->tv_nsec >= 1000000000) { 62 ts->tv_sec++; 63 ts->tv_nsec -= 1000000000; 64 } 65 } 66 67 void qemu_mutex_init(QemuMutex *mutex) 68 { 69 int err; 70 71 err = pthread_mutex_init(&mutex->lock, NULL); 72 if (err) 73 error_exit(err, __func__); 74 qemu_mutex_post_init(mutex); 75 } 76 77 void qemu_mutex_destroy(QemuMutex *mutex) 78 { 79 int err; 80 81 assert(mutex->initialized); 82 mutex->initialized = false; 83 err = pthread_mutex_destroy(&mutex->lock); 84 if (err) 85 error_exit(err, __func__); 86 } 87 88 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line) 89 { 90 int err; 91 92 assert(mutex->initialized); 93 qemu_mutex_pre_lock(mutex, file, line); 94 err = pthread_mutex_lock(&mutex->lock); 95 if (err) 96 error_exit(err, __func__); 97 qemu_mutex_post_lock(mutex, file, line); 98 } 99 100 int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line) 101 { 102 int err; 103 104 assert(mutex->initialized); 105 err = pthread_mutex_trylock(&mutex->lock); 106 if (err == 0) { 107 qemu_mutex_post_lock(mutex, file, line); 108 return 0; 109 } 110 if (err != EBUSY) { 111 error_exit(err, __func__); 112 } 113 return -EBUSY; 114 } 115 116 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line) 117 { 118 int err; 119 120 assert(mutex->initialized); 121 qemu_mutex_pre_unlock(mutex, file, line); 122 err = pthread_mutex_unlock(&mutex->lock); 123 if (err) 124 error_exit(err, __func__); 125 } 126 127 void qemu_rec_mutex_init(QemuRecMutex *mutex) 128 { 129 int err; 130 pthread_mutexattr_t attr; 131 132 pthread_mutexattr_init(&attr); 133 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); 134 err = pthread_mutex_init(&mutex->m.lock, &attr); 135 pthread_mutexattr_destroy(&attr); 136 if (err) { 137 error_exit(err, __func__); 138 } 139 mutex->m.initialized = true; 140 } 141 142 void qemu_rec_mutex_destroy(QemuRecMutex *mutex) 143 { 144 qemu_mutex_destroy(&mutex->m); 145 } 146 147 void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line) 148 { 149 qemu_mutex_lock_impl(&mutex->m, file, line); 150 } 151 152 int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line) 153 { 154 return qemu_mutex_trylock_impl(&mutex->m, file, line); 155 } 156 157 void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line) 158 { 159 qemu_mutex_unlock_impl(&mutex->m, file, line); 160 } 161 162 void qemu_cond_init(QemuCond *cond) 163 { 164 pthread_condattr_t attr; 165 int err; 166 167 err = pthread_condattr_init(&attr); 168 if (err) { 169 error_exit(err, __func__); 170 } 171 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK 172 err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid()); 173 if (err) { 174 error_exit(err, __func__); 175 } 176 #endif 177 err = pthread_cond_init(&cond->cond, &attr); 178 if (err) { 179 error_exit(err, __func__); 180 } 181 err = pthread_condattr_destroy(&attr); 182 if (err) { 183 error_exit(err, __func__); 184 } 185 cond->initialized = true; 186 } 187 188 void qemu_cond_destroy(QemuCond *cond) 189 { 190 int err; 191 192 assert(cond->initialized); 193 cond->initialized = false; 194 err = pthread_cond_destroy(&cond->cond); 195 if (err) 196 error_exit(err, __func__); 197 } 198 199 void qemu_cond_signal(QemuCond *cond) 200 { 201 int err; 202 203 assert(cond->initialized); 204 err = pthread_cond_signal(&cond->cond); 205 if (err) 206 error_exit(err, __func__); 207 } 208 209 void qemu_cond_broadcast(QemuCond *cond) 210 { 211 int err; 212 213 assert(cond->initialized); 214 err = pthread_cond_broadcast(&cond->cond); 215 if (err) 216 error_exit(err, __func__); 217 } 218 219 void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line) 220 { 221 int err; 222 223 assert(cond->initialized); 224 qemu_mutex_pre_unlock(mutex, file, line); 225 err = pthread_cond_wait(&cond->cond, &mutex->lock); 226 qemu_mutex_post_lock(mutex, file, line); 227 if (err) 228 error_exit(err, __func__); 229 } 230 231 static bool TSA_NO_TSA 232 qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts, 233 const char *file, const int line) 234 { 235 int err; 236 237 assert(cond->initialized); 238 trace_qemu_mutex_unlock(mutex, file, line); 239 err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts); 240 trace_qemu_mutex_locked(mutex, file, line); 241 if (err && err != ETIMEDOUT) { 242 error_exit(err, __func__); 243 } 244 return err != ETIMEDOUT; 245 } 246 247 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, 248 const char *file, const int line) 249 { 250 struct timespec ts; 251 252 compute_abs_deadline(&ts, ms); 253 return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line); 254 } 255 256 void qemu_sem_init(QemuSemaphore *sem, int init) 257 { 258 qemu_mutex_init(&sem->mutex); 259 qemu_cond_init(&sem->cond); 260 261 if (init < 0) { 262 error_exit(EINVAL, __func__); 263 } 264 sem->count = init; 265 } 266 267 void qemu_sem_destroy(QemuSemaphore *sem) 268 { 269 qemu_cond_destroy(&sem->cond); 270 qemu_mutex_destroy(&sem->mutex); 271 } 272 273 void qemu_sem_post(QemuSemaphore *sem) 274 { 275 qemu_mutex_lock(&sem->mutex); 276 if (sem->count == UINT_MAX) { 277 error_exit(EINVAL, __func__); 278 } else { 279 sem->count++; 280 qemu_cond_signal(&sem->cond); 281 } 282 qemu_mutex_unlock(&sem->mutex); 283 } 284 285 int qemu_sem_timedwait(QemuSemaphore *sem, int ms) 286 { 287 bool rc = true; 288 struct timespec ts; 289 290 compute_abs_deadline(&ts, ms); 291 qemu_mutex_lock(&sem->mutex); 292 while (sem->count == 0) { 293 if (ms == 0) { 294 rc = false; 295 } else { 296 rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts, 297 __FILE__, __LINE__); 298 } 299 if (!rc) { /* timeout */ 300 break; 301 } 302 } 303 if (rc) { 304 --sem->count; 305 } 306 qemu_mutex_unlock(&sem->mutex); 307 return (rc ? 0 : -1); 308 } 309 310 void qemu_sem_wait(QemuSemaphore *sem) 311 { 312 qemu_mutex_lock(&sem->mutex); 313 while (sem->count == 0) { 314 qemu_cond_wait(&sem->cond, &sem->mutex); 315 } 316 --sem->count; 317 qemu_mutex_unlock(&sem->mutex); 318 } 319 320 #ifdef __linux__ 321 #include "qemu/futex.h" 322 #else 323 static inline void qemu_futex_wake(QemuEvent *ev, int n) 324 { 325 assert(ev->initialized); 326 pthread_mutex_lock(&ev->lock); 327 if (n == 1) { 328 pthread_cond_signal(&ev->cond); 329 } else { 330 pthread_cond_broadcast(&ev->cond); 331 } 332 pthread_mutex_unlock(&ev->lock); 333 } 334 335 static inline void qemu_futex_wait(QemuEvent *ev, unsigned val) 336 { 337 assert(ev->initialized); 338 pthread_mutex_lock(&ev->lock); 339 if (ev->value == val) { 340 pthread_cond_wait(&ev->cond, &ev->lock); 341 } 342 pthread_mutex_unlock(&ev->lock); 343 } 344 #endif 345 346 /* Valid transitions: 347 * - free->set, when setting the event 348 * - busy->set, when setting the event, followed by qemu_futex_wake 349 * - set->free, when resetting the event 350 * - free->busy, when waiting 351 * 352 * set->busy does not happen (it can be observed from the outside but 353 * it really is set->free->busy). 354 * 355 * busy->free provably cannot happen; to enforce it, the set->free transition 356 * is done with an OR, which becomes a no-op if the event has concurrently 357 * transitioned to free or busy. 358 */ 359 360 #define EV_SET 0 361 #define EV_FREE 1 362 #define EV_BUSY -1 363 364 void qemu_event_init(QemuEvent *ev, bool init) 365 { 366 #ifndef __linux__ 367 pthread_mutex_init(&ev->lock, NULL); 368 pthread_cond_init(&ev->cond, NULL); 369 #endif 370 371 ev->value = (init ? EV_SET : EV_FREE); 372 ev->initialized = true; 373 } 374 375 void qemu_event_destroy(QemuEvent *ev) 376 { 377 assert(ev->initialized); 378 ev->initialized = false; 379 #ifndef __linux__ 380 pthread_mutex_destroy(&ev->lock); 381 pthread_cond_destroy(&ev->cond); 382 #endif 383 } 384 385 void qemu_event_set(QemuEvent *ev) 386 { 387 assert(ev->initialized); 388 389 /* 390 * Pairs with both qemu_event_reset() and qemu_event_wait(). 391 * 392 * qemu_event_set has release semantics, but because it *loads* 393 * ev->value we need a full memory barrier here. 394 */ 395 smp_mb(); 396 if (qatomic_read(&ev->value) != EV_SET) { 397 int old = qatomic_xchg(&ev->value, EV_SET); 398 399 /* Pairs with memory barrier in kernel futex_wait system call. */ 400 smp_mb__after_rmw(); 401 if (old == EV_BUSY) { 402 /* There were waiters, wake them up. */ 403 qemu_futex_wake(ev, INT_MAX); 404 } 405 } 406 } 407 408 void qemu_event_reset(QemuEvent *ev) 409 { 410 assert(ev->initialized); 411 412 /* 413 * If there was a concurrent reset (or even reset+wait), 414 * do nothing. Otherwise change EV_SET->EV_FREE. 415 */ 416 qatomic_or(&ev->value, EV_FREE); 417 418 /* 419 * Order reset before checking the condition in the caller. 420 * Pairs with the first memory barrier in qemu_event_set(). 421 */ 422 smp_mb__after_rmw(); 423 } 424 425 void qemu_event_wait(QemuEvent *ev) 426 { 427 unsigned value; 428 429 assert(ev->initialized); 430 431 /* 432 * qemu_event_wait must synchronize with qemu_event_set even if it does 433 * not go down the slow path, so this load-acquire is needed that 434 * synchronizes with the first memory barrier in qemu_event_set(). 435 * 436 * If we do go down the slow path, there is no requirement at all: we 437 * might miss a qemu_event_set() here but ultimately the memory barrier in 438 * qemu_futex_wait() will ensure the check is done correctly. 439 */ 440 value = qatomic_load_acquire(&ev->value); 441 if (value != EV_SET) { 442 if (value == EV_FREE) { 443 /* 444 * Leave the event reset and tell qemu_event_set that there are 445 * waiters. No need to retry, because there cannot be a concurrent 446 * busy->free transition. After the CAS, the event will be either 447 * set or busy. 448 * 449 * This cmpxchg doesn't have particular ordering requirements if it 450 * succeeds (moving the store earlier can only cause qemu_event_set() 451 * to issue _more_ wakeups), the failing case needs acquire semantics 452 * like the load above. 453 */ 454 if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { 455 return; 456 } 457 } 458 459 /* 460 * This is the final check for a concurrent set, so it does need 461 * a smp_mb() pairing with the second barrier of qemu_event_set(). 462 * The barrier is inside the FUTEX_WAIT system call. 463 */ 464 qemu_futex_wait(ev, EV_BUSY); 465 } 466 } 467 468 static __thread NotifierList thread_exit; 469 470 /* 471 * Note that in this implementation you can register a thread-exit 472 * notifier for the main thread, but it will never be called. 473 * This is OK because main thread exit can only happen when the 474 * entire process is exiting, and the API allows notifiers to not 475 * be called on process exit. 476 */ 477 void qemu_thread_atexit_add(Notifier *notifier) 478 { 479 notifier_list_add(&thread_exit, notifier); 480 } 481 482 void qemu_thread_atexit_remove(Notifier *notifier) 483 { 484 notifier_remove(notifier); 485 } 486 487 static void qemu_thread_atexit_notify(void *arg) 488 { 489 /* 490 * Called when non-main thread exits (via qemu_thread_exit() 491 * or by returning from its start routine.) 492 */ 493 notifier_list_notify(&thread_exit, NULL); 494 } 495 496 typedef struct { 497 void *(*start_routine)(void *); 498 void *arg; 499 char *name; 500 } QemuThreadArgs; 501 502 static void *qemu_thread_start(void *args) 503 { 504 QemuThreadArgs *qemu_thread_args = args; 505 void *(*start_routine)(void *) = qemu_thread_args->start_routine; 506 void *arg = qemu_thread_args->arg; 507 void *r; 508 509 /* Attempt to set the threads name; note that this is for debug, so 510 * we're not going to fail if we can't set it. 511 */ 512 if (name_threads && qemu_thread_args->name) { 513 # if defined(CONFIG_PTHREAD_SETNAME_NP_W_TID) 514 pthread_setname_np(pthread_self(), qemu_thread_args->name); 515 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID) 516 pthread_setname_np(qemu_thread_args->name); 517 # elif defined(CONFIG_PTHREAD_SET_NAME_NP) 518 pthread_set_name_np(pthread_self(), qemu_thread_args->name); 519 # endif 520 } 521 QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name); 522 g_free(qemu_thread_args->name); 523 g_free(qemu_thread_args); 524 525 /* 526 * GCC 11 with glibc 2.17 on PowerPC reports 527 * 528 * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes 529 * in a region of size 528 [-Werror=stringop-overflow=] 530 * 540 | pthread_cleanup_push(qemu_thread_atexit_notify, NULL); 531 * | ^~~~~~~~~~~~~~~~~~~~ 532 * 533 * which is clearly nonsense. 534 */ 535 #pragma GCC diagnostic push 536 #ifndef __clang__ 537 #pragma GCC diagnostic ignored "-Wstringop-overflow" 538 #endif 539 540 pthread_cleanup_push(qemu_thread_atexit_notify, NULL); 541 r = start_routine(arg); 542 pthread_cleanup_pop(1); 543 544 #pragma GCC diagnostic pop 545 546 return r; 547 } 548 549 void qemu_thread_create(QemuThread *thread, const char *name, 550 void *(*start_routine)(void*), 551 void *arg, int mode) 552 { 553 sigset_t set, oldset; 554 int err; 555 pthread_attr_t attr; 556 QemuThreadArgs *qemu_thread_args; 557 558 err = pthread_attr_init(&attr); 559 if (err) { 560 error_exit(err, __func__); 561 } 562 563 if (mode == QEMU_THREAD_DETACHED) { 564 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); 565 } 566 567 /* Leave signal handling to the iothread. */ 568 sigfillset(&set); 569 /* Blocking the signals can result in undefined behaviour. */ 570 sigdelset(&set, SIGSEGV); 571 sigdelset(&set, SIGFPE); 572 sigdelset(&set, SIGILL); 573 /* TODO avoid SIGBUS loss on macOS */ 574 pthread_sigmask(SIG_SETMASK, &set, &oldset); 575 576 qemu_thread_args = g_new0(QemuThreadArgs, 1); 577 qemu_thread_args->name = g_strdup(name); 578 qemu_thread_args->start_routine = start_routine; 579 qemu_thread_args->arg = arg; 580 581 err = pthread_create(&thread->thread, &attr, 582 qemu_thread_start, qemu_thread_args); 583 584 if (err) 585 error_exit(err, __func__); 586 587 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 588 589 pthread_attr_destroy(&attr); 590 } 591 592 int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, 593 unsigned long nbits) 594 { 595 #if defined(CONFIG_PTHREAD_AFFINITY_NP) 596 const size_t setsize = CPU_ALLOC_SIZE(nbits); 597 unsigned long value; 598 cpu_set_t *cpuset; 599 int err; 600 601 cpuset = CPU_ALLOC(nbits); 602 g_assert(cpuset); 603 604 CPU_ZERO_S(setsize, cpuset); 605 value = find_first_bit(host_cpus, nbits); 606 while (value < nbits) { 607 CPU_SET_S(value, setsize, cpuset); 608 value = find_next_bit(host_cpus, nbits, value + 1); 609 } 610 611 err = pthread_setaffinity_np(thread->thread, setsize, cpuset); 612 CPU_FREE(cpuset); 613 return err; 614 #else 615 return -ENOSYS; 616 #endif 617 } 618 619 int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, 620 unsigned long *nbits) 621 { 622 #if defined(CONFIG_PTHREAD_AFFINITY_NP) 623 unsigned long tmpbits; 624 cpu_set_t *cpuset; 625 size_t setsize; 626 int i, err; 627 628 tmpbits = CPU_SETSIZE; 629 while (true) { 630 setsize = CPU_ALLOC_SIZE(tmpbits); 631 cpuset = CPU_ALLOC(tmpbits); 632 g_assert(cpuset); 633 634 err = pthread_getaffinity_np(thread->thread, setsize, cpuset); 635 if (err) { 636 CPU_FREE(cpuset); 637 if (err != -EINVAL) { 638 return err; 639 } 640 tmpbits *= 2; 641 } else { 642 break; 643 } 644 } 645 646 /* Convert the result into a proper bitmap. */ 647 *nbits = tmpbits; 648 *host_cpus = bitmap_new(tmpbits); 649 for (i = 0; i < tmpbits; i++) { 650 if (CPU_ISSET(i, cpuset)) { 651 set_bit(i, *host_cpus); 652 } 653 } 654 CPU_FREE(cpuset); 655 return 0; 656 #else 657 return -ENOSYS; 658 #endif 659 } 660 661 void qemu_thread_get_self(QemuThread *thread) 662 { 663 thread->thread = pthread_self(); 664 } 665 666 bool qemu_thread_is_self(QemuThread *thread) 667 { 668 return pthread_equal(pthread_self(), thread->thread); 669 } 670 671 void qemu_thread_exit(void *retval) 672 { 673 pthread_exit(retval); 674 } 675 676 void *qemu_thread_join(QemuThread *thread) 677 { 678 int err; 679 void *ret; 680 681 err = pthread_join(thread->thread, &ret); 682 if (err) { 683 error_exit(err, __func__); 684 } 685 return ret; 686 } 687