xref: /openbmc/qemu/util/qemu-thread-posix.c (revision 51e47cf8)
1 /*
2  * Wrappers around mutex/cond/thread functions
3  *
4  * Copyright Red Hat, Inc. 2009
5  *
6  * Author:
7  *  Marcelo Tosatti <mtosatti@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  *
12  */
13 #include "qemu/osdep.h"
14 #include "qemu/thread.h"
15 #include "qemu/atomic.h"
16 #include "qemu/notify.h"
17 #include "qemu-thread-common.h"
18 #include "qemu/tsan.h"
19 #include "qemu/bitmap.h"
20 
21 #ifdef CONFIG_PTHREAD_SET_NAME_NP
22 #include <pthread_np.h>
23 #endif
24 
25 static bool name_threads;
26 
27 void qemu_thread_naming(bool enable)
28 {
29     name_threads = enable;
30 
31 #if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
32     !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \
33     !defined CONFIG_PTHREAD_SET_NAME_NP
34     /* This is a debugging option, not fatal */
35     if (enable) {
36         fprintf(stderr, "qemu: thread naming not supported on this host\n");
37     }
38 #endif
39 }
40 
41 static void error_exit(int err, const char *msg)
42 {
43     fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
44     abort();
45 }
46 
47 static inline clockid_t qemu_timedwait_clockid(void)
48 {
49 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
50     return CLOCK_MONOTONIC;
51 #else
52     return CLOCK_REALTIME;
53 #endif
54 }
55 
56 static void compute_abs_deadline(struct timespec *ts, int ms)
57 {
58     clock_gettime(qemu_timedwait_clockid(), ts);
59     ts->tv_nsec += (ms % 1000) * 1000000;
60     ts->tv_sec += ms / 1000;
61     if (ts->tv_nsec >= 1000000000) {
62         ts->tv_sec++;
63         ts->tv_nsec -= 1000000000;
64     }
65 }
66 
67 void qemu_mutex_init(QemuMutex *mutex)
68 {
69     int err;
70 
71     err = pthread_mutex_init(&mutex->lock, NULL);
72     if (err)
73         error_exit(err, __func__);
74     qemu_mutex_post_init(mutex);
75 }
76 
77 void qemu_mutex_destroy(QemuMutex *mutex)
78 {
79     int err;
80 
81     assert(mutex->initialized);
82     mutex->initialized = false;
83     err = pthread_mutex_destroy(&mutex->lock);
84     if (err)
85         error_exit(err, __func__);
86 }
87 
88 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
89 {
90     int err;
91 
92     assert(mutex->initialized);
93     qemu_mutex_pre_lock(mutex, file, line);
94     err = pthread_mutex_lock(&mutex->lock);
95     if (err)
96         error_exit(err, __func__);
97     qemu_mutex_post_lock(mutex, file, line);
98 }
99 
100 int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
101 {
102     int err;
103 
104     assert(mutex->initialized);
105     err = pthread_mutex_trylock(&mutex->lock);
106     if (err == 0) {
107         qemu_mutex_post_lock(mutex, file, line);
108         return 0;
109     }
110     if (err != EBUSY) {
111         error_exit(err, __func__);
112     }
113     return -EBUSY;
114 }
115 
116 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line)
117 {
118     int err;
119 
120     assert(mutex->initialized);
121     qemu_mutex_pre_unlock(mutex, file, line);
122     err = pthread_mutex_unlock(&mutex->lock);
123     if (err)
124         error_exit(err, __func__);
125 }
126 
127 void qemu_rec_mutex_init(QemuRecMutex *mutex)
128 {
129     int err;
130     pthread_mutexattr_t attr;
131 
132     pthread_mutexattr_init(&attr);
133     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
134     err = pthread_mutex_init(&mutex->m.lock, &attr);
135     pthread_mutexattr_destroy(&attr);
136     if (err) {
137         error_exit(err, __func__);
138     }
139     mutex->m.initialized = true;
140 }
141 
142 void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
143 {
144     qemu_mutex_destroy(&mutex->m);
145 }
146 
147 void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
148 {
149     qemu_mutex_lock_impl(&mutex->m, file, line);
150 }
151 
152 int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
153 {
154     return qemu_mutex_trylock_impl(&mutex->m, file, line);
155 }
156 
157 void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
158 {
159     qemu_mutex_unlock_impl(&mutex->m, file, line);
160 }
161 
162 void qemu_cond_init(QemuCond *cond)
163 {
164     pthread_condattr_t attr;
165     int err;
166 
167     err = pthread_condattr_init(&attr);
168     if (err) {
169         error_exit(err, __func__);
170     }
171 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
172     err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid());
173     if (err) {
174         error_exit(err, __func__);
175     }
176 #endif
177     err = pthread_cond_init(&cond->cond, &attr);
178     if (err) {
179         error_exit(err, __func__);
180     }
181     err = pthread_condattr_destroy(&attr);
182     if (err) {
183         error_exit(err, __func__);
184     }
185     cond->initialized = true;
186 }
187 
188 void qemu_cond_destroy(QemuCond *cond)
189 {
190     int err;
191 
192     assert(cond->initialized);
193     cond->initialized = false;
194     err = pthread_cond_destroy(&cond->cond);
195     if (err)
196         error_exit(err, __func__);
197 }
198 
199 void qemu_cond_signal(QemuCond *cond)
200 {
201     int err;
202 
203     assert(cond->initialized);
204     err = pthread_cond_signal(&cond->cond);
205     if (err)
206         error_exit(err, __func__);
207 }
208 
209 void qemu_cond_broadcast(QemuCond *cond)
210 {
211     int err;
212 
213     assert(cond->initialized);
214     err = pthread_cond_broadcast(&cond->cond);
215     if (err)
216         error_exit(err, __func__);
217 }
218 
219 void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line)
220 {
221     int err;
222 
223     assert(cond->initialized);
224     qemu_mutex_pre_unlock(mutex, file, line);
225     err = pthread_cond_wait(&cond->cond, &mutex->lock);
226     qemu_mutex_post_lock(mutex, file, line);
227     if (err)
228         error_exit(err, __func__);
229 }
230 
231 static bool TSA_NO_TSA
232 qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts,
233                        const char *file, const int line)
234 {
235     int err;
236 
237     assert(cond->initialized);
238     trace_qemu_mutex_unlock(mutex, file, line);
239     err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts);
240     trace_qemu_mutex_locked(mutex, file, line);
241     if (err && err != ETIMEDOUT) {
242         error_exit(err, __func__);
243     }
244     return err != ETIMEDOUT;
245 }
246 
247 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
248                               const char *file, const int line)
249 {
250     struct timespec ts;
251 
252     compute_abs_deadline(&ts, ms);
253     return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line);
254 }
255 
256 void qemu_sem_init(QemuSemaphore *sem, int init)
257 {
258     qemu_mutex_init(&sem->mutex);
259     qemu_cond_init(&sem->cond);
260 
261     if (init < 0) {
262         error_exit(EINVAL, __func__);
263     }
264     sem->count = init;
265 }
266 
267 void qemu_sem_destroy(QemuSemaphore *sem)
268 {
269     qemu_cond_destroy(&sem->cond);
270     qemu_mutex_destroy(&sem->mutex);
271 }
272 
273 void qemu_sem_post(QemuSemaphore *sem)
274 {
275     qemu_mutex_lock(&sem->mutex);
276     if (sem->count == UINT_MAX) {
277         error_exit(EINVAL, __func__);
278     } else {
279         sem->count++;
280         qemu_cond_signal(&sem->cond);
281     }
282     qemu_mutex_unlock(&sem->mutex);
283 }
284 
285 int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
286 {
287     bool rc = true;
288     struct timespec ts;
289 
290     compute_abs_deadline(&ts, ms);
291     qemu_mutex_lock(&sem->mutex);
292     while (sem->count == 0) {
293         if (ms == 0) {
294             rc = false;
295         } else {
296             rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts,
297                                         __FILE__, __LINE__);
298         }
299         if (!rc) { /* timeout */
300             break;
301         }
302     }
303     if (rc) {
304         --sem->count;
305     }
306     qemu_mutex_unlock(&sem->mutex);
307     return (rc ? 0 : -1);
308 }
309 
310 void qemu_sem_wait(QemuSemaphore *sem)
311 {
312     qemu_mutex_lock(&sem->mutex);
313     while (sem->count == 0) {
314         qemu_cond_wait(&sem->cond, &sem->mutex);
315     }
316     --sem->count;
317     qemu_mutex_unlock(&sem->mutex);
318 }
319 
320 #ifdef __linux__
321 #include "qemu/futex.h"
322 #else
323 static inline void qemu_futex_wake(QemuEvent *ev, int n)
324 {
325     assert(ev->initialized);
326     pthread_mutex_lock(&ev->lock);
327     if (n == 1) {
328         pthread_cond_signal(&ev->cond);
329     } else {
330         pthread_cond_broadcast(&ev->cond);
331     }
332     pthread_mutex_unlock(&ev->lock);
333 }
334 
335 static inline void qemu_futex_wait(QemuEvent *ev, unsigned val)
336 {
337     assert(ev->initialized);
338     pthread_mutex_lock(&ev->lock);
339     if (ev->value == val) {
340         pthread_cond_wait(&ev->cond, &ev->lock);
341     }
342     pthread_mutex_unlock(&ev->lock);
343 }
344 #endif
345 
346 /* Valid transitions:
347  * - free->set, when setting the event
348  * - busy->set, when setting the event, followed by qemu_futex_wake
349  * - set->free, when resetting the event
350  * - free->busy, when waiting
351  *
352  * set->busy does not happen (it can be observed from the outside but
353  * it really is set->free->busy).
354  *
355  * busy->free provably cannot happen; to enforce it, the set->free transition
356  * is done with an OR, which becomes a no-op if the event has concurrently
357  * transitioned to free or busy.
358  */
359 
360 #define EV_SET         0
361 #define EV_FREE        1
362 #define EV_BUSY       -1
363 
364 void qemu_event_init(QemuEvent *ev, bool init)
365 {
366 #ifndef __linux__
367     pthread_mutex_init(&ev->lock, NULL);
368     pthread_cond_init(&ev->cond, NULL);
369 #endif
370 
371     ev->value = (init ? EV_SET : EV_FREE);
372     ev->initialized = true;
373 }
374 
375 void qemu_event_destroy(QemuEvent *ev)
376 {
377     assert(ev->initialized);
378     ev->initialized = false;
379 #ifndef __linux__
380     pthread_mutex_destroy(&ev->lock);
381     pthread_cond_destroy(&ev->cond);
382 #endif
383 }
384 
385 void qemu_event_set(QemuEvent *ev)
386 {
387     assert(ev->initialized);
388 
389     /*
390      * Pairs with both qemu_event_reset() and qemu_event_wait().
391      *
392      * qemu_event_set has release semantics, but because it *loads*
393      * ev->value we need a full memory barrier here.
394      */
395     smp_mb();
396     if (qatomic_read(&ev->value) != EV_SET) {
397         int old = qatomic_xchg(&ev->value, EV_SET);
398 
399         /* Pairs with memory barrier in kernel futex_wait system call.  */
400         smp_mb__after_rmw();
401         if (old == EV_BUSY) {
402             /* There were waiters, wake them up.  */
403             qemu_futex_wake(ev, INT_MAX);
404         }
405     }
406 }
407 
408 void qemu_event_reset(QemuEvent *ev)
409 {
410     assert(ev->initialized);
411 
412     /*
413      * If there was a concurrent reset (or even reset+wait),
414      * do nothing.  Otherwise change EV_SET->EV_FREE.
415      */
416     qatomic_or(&ev->value, EV_FREE);
417 
418     /*
419      * Order reset before checking the condition in the caller.
420      * Pairs with the first memory barrier in qemu_event_set().
421      */
422     smp_mb__after_rmw();
423 }
424 
425 void qemu_event_wait(QemuEvent *ev)
426 {
427     unsigned value;
428 
429     assert(ev->initialized);
430 
431     /*
432      * qemu_event_wait must synchronize with qemu_event_set even if it does
433      * not go down the slow path, so this load-acquire is needed that
434      * synchronizes with the first memory barrier in qemu_event_set().
435      *
436      * If we do go down the slow path, there is no requirement at all: we
437      * might miss a qemu_event_set() here but ultimately the memory barrier in
438      * qemu_futex_wait() will ensure the check is done correctly.
439      */
440     value = qatomic_load_acquire(&ev->value);
441     if (value != EV_SET) {
442         if (value == EV_FREE) {
443             /*
444              * Leave the event reset and tell qemu_event_set that there are
445              * waiters.  No need to retry, because there cannot be a concurrent
446              * busy->free transition.  After the CAS, the event will be either
447              * set or busy.
448              *
449              * This cmpxchg doesn't have particular ordering requirements if it
450              * succeeds (moving the store earlier can only cause qemu_event_set()
451              * to issue _more_ wakeups), the failing case needs acquire semantics
452              * like the load above.
453              */
454             if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
455                 return;
456             }
457         }
458 
459         /*
460          * This is the final check for a concurrent set, so it does need
461          * a smp_mb() pairing with the second barrier of qemu_event_set().
462          * The barrier is inside the FUTEX_WAIT system call.
463          */
464         qemu_futex_wait(ev, EV_BUSY);
465     }
466 }
467 
468 static __thread NotifierList thread_exit;
469 
470 /*
471  * Note that in this implementation you can register a thread-exit
472  * notifier for the main thread, but it will never be called.
473  * This is OK because main thread exit can only happen when the
474  * entire process is exiting, and the API allows notifiers to not
475  * be called on process exit.
476  */
477 void qemu_thread_atexit_add(Notifier *notifier)
478 {
479     notifier_list_add(&thread_exit, notifier);
480 }
481 
482 void qemu_thread_atexit_remove(Notifier *notifier)
483 {
484     notifier_remove(notifier);
485 }
486 
487 static void qemu_thread_atexit_notify(void *arg)
488 {
489     /*
490      * Called when non-main thread exits (via qemu_thread_exit()
491      * or by returning from its start routine.)
492      */
493     notifier_list_notify(&thread_exit, NULL);
494 }
495 
496 typedef struct {
497     void *(*start_routine)(void *);
498     void *arg;
499     char *name;
500 } QemuThreadArgs;
501 
502 static void *qemu_thread_start(void *args)
503 {
504     QemuThreadArgs *qemu_thread_args = args;
505     void *(*start_routine)(void *) = qemu_thread_args->start_routine;
506     void *arg = qemu_thread_args->arg;
507     void *r;
508 
509     /* Attempt to set the threads name; note that this is for debug, so
510      * we're not going to fail if we can't set it.
511      */
512     if (name_threads && qemu_thread_args->name) {
513 # if defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
514         pthread_setname_np(pthread_self(), qemu_thread_args->name);
515 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
516         pthread_setname_np(qemu_thread_args->name);
517 # elif defined(CONFIG_PTHREAD_SET_NAME_NP)
518         pthread_set_name_np(pthread_self(), qemu_thread_args->name);
519 # endif
520     }
521     QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
522     g_free(qemu_thread_args->name);
523     g_free(qemu_thread_args);
524 
525     /*
526      * GCC 11 with glibc 2.17 on PowerPC reports
527      *
528      * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes
529      *   in a region of size 528 [-Werror=stringop-overflow=]
530      * 540 |     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
531      *     |     ^~~~~~~~~~~~~~~~~~~~
532      *
533      * which is clearly nonsense.
534      */
535 #pragma GCC diagnostic push
536 #ifndef __clang__
537 #pragma GCC diagnostic ignored "-Wstringop-overflow"
538 #endif
539 
540     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
541     r = start_routine(arg);
542     pthread_cleanup_pop(1);
543 
544 #pragma GCC diagnostic pop
545 
546     return r;
547 }
548 
549 void qemu_thread_create(QemuThread *thread, const char *name,
550                        void *(*start_routine)(void*),
551                        void *arg, int mode)
552 {
553     sigset_t set, oldset;
554     int err;
555     pthread_attr_t attr;
556     QemuThreadArgs *qemu_thread_args;
557 
558     err = pthread_attr_init(&attr);
559     if (err) {
560         error_exit(err, __func__);
561     }
562 
563     if (mode == QEMU_THREAD_DETACHED) {
564         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
565     }
566 
567     /* Leave signal handling to the iothread.  */
568     sigfillset(&set);
569     /* Blocking the signals can result in undefined behaviour. */
570     sigdelset(&set, SIGSEGV);
571     sigdelset(&set, SIGFPE);
572     sigdelset(&set, SIGILL);
573     /* TODO avoid SIGBUS loss on macOS */
574     pthread_sigmask(SIG_SETMASK, &set, &oldset);
575 
576     qemu_thread_args = g_new0(QemuThreadArgs, 1);
577     qemu_thread_args->name = g_strdup(name);
578     qemu_thread_args->start_routine = start_routine;
579     qemu_thread_args->arg = arg;
580 
581     err = pthread_create(&thread->thread, &attr,
582                          qemu_thread_start, qemu_thread_args);
583 
584     if (err)
585         error_exit(err, __func__);
586 
587     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
588 
589     pthread_attr_destroy(&attr);
590 }
591 
592 int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
593                              unsigned long nbits)
594 {
595 #if defined(CONFIG_PTHREAD_AFFINITY_NP)
596     const size_t setsize = CPU_ALLOC_SIZE(nbits);
597     unsigned long value;
598     cpu_set_t *cpuset;
599     int err;
600 
601     cpuset = CPU_ALLOC(nbits);
602     g_assert(cpuset);
603 
604     CPU_ZERO_S(setsize, cpuset);
605     value = find_first_bit(host_cpus, nbits);
606     while (value < nbits) {
607         CPU_SET_S(value, setsize, cpuset);
608         value = find_next_bit(host_cpus, nbits, value + 1);
609     }
610 
611     err = pthread_setaffinity_np(thread->thread, setsize, cpuset);
612     CPU_FREE(cpuset);
613     return err;
614 #else
615     return -ENOSYS;
616 #endif
617 }
618 
619 int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
620                              unsigned long *nbits)
621 {
622 #if defined(CONFIG_PTHREAD_AFFINITY_NP)
623     unsigned long tmpbits;
624     cpu_set_t *cpuset;
625     size_t setsize;
626     int i, err;
627 
628     tmpbits = CPU_SETSIZE;
629     while (true) {
630         setsize = CPU_ALLOC_SIZE(tmpbits);
631         cpuset = CPU_ALLOC(tmpbits);
632         g_assert(cpuset);
633 
634         err = pthread_getaffinity_np(thread->thread, setsize, cpuset);
635         if (err) {
636             CPU_FREE(cpuset);
637             if (err != -EINVAL) {
638                 return err;
639             }
640             tmpbits *= 2;
641         } else {
642             break;
643         }
644     }
645 
646     /* Convert the result into a proper bitmap. */
647     *nbits = tmpbits;
648     *host_cpus = bitmap_new(tmpbits);
649     for (i = 0; i < tmpbits; i++) {
650         if (CPU_ISSET(i, cpuset)) {
651             set_bit(i, *host_cpus);
652         }
653     }
654     CPU_FREE(cpuset);
655     return 0;
656 #else
657     return -ENOSYS;
658 #endif
659 }
660 
661 void qemu_thread_get_self(QemuThread *thread)
662 {
663     thread->thread = pthread_self();
664 }
665 
666 bool qemu_thread_is_self(QemuThread *thread)
667 {
668    return pthread_equal(pthread_self(), thread->thread);
669 }
670 
671 void qemu_thread_exit(void *retval)
672 {
673     pthread_exit(retval);
674 }
675 
676 void *qemu_thread_join(QemuThread *thread)
677 {
678     int err;
679     void *ret;
680 
681     err = pthread_join(thread->thread, &ret);
682     if (err) {
683         error_exit(err, __func__);
684     }
685     return ret;
686 }
687