xref: /openbmc/qemu/util/qemu-thread-posix.c (revision aa09b3d5)
1 /*
2  * Wrappers around mutex/cond/thread functions
3  *
4  * Copyright Red Hat, Inc. 2009
5  *
6  * Author:
7  *  Marcelo Tosatti <mtosatti@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  *
12  */
13 #include "qemu/osdep.h"
14 #include "qemu/thread.h"
15 #include "qemu/atomic.h"
16 #include "qemu/notify.h"
17 #include "qemu-thread-common.h"
18 #include "qemu/tsan.h"
19 #include "qemu/bitmap.h"
20 
21 static bool name_threads;
22 
23 void qemu_thread_naming(bool enable)
24 {
25     name_threads = enable;
26 
27 #if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
28     !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID
29     /* This is a debugging option, not fatal */
30     if (enable) {
31         fprintf(stderr, "qemu: thread naming not supported on this host\n");
32     }
33 #endif
34 }
35 
36 static void error_exit(int err, const char *msg)
37 {
38     fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
39     abort();
40 }
41 
42 static inline clockid_t qemu_timedwait_clockid(void)
43 {
44 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
45     return CLOCK_MONOTONIC;
46 #else
47     return CLOCK_REALTIME;
48 #endif
49 }
50 
51 static void compute_abs_deadline(struct timespec *ts, int ms)
52 {
53     clock_gettime(qemu_timedwait_clockid(), ts);
54     ts->tv_nsec += (ms % 1000) * 1000000;
55     ts->tv_sec += ms / 1000;
56     if (ts->tv_nsec >= 1000000000) {
57         ts->tv_sec++;
58         ts->tv_nsec -= 1000000000;
59     }
60 }
61 
62 void qemu_mutex_init(QemuMutex *mutex)
63 {
64     int err;
65 
66     err = pthread_mutex_init(&mutex->lock, NULL);
67     if (err)
68         error_exit(err, __func__);
69     qemu_mutex_post_init(mutex);
70 }
71 
72 void qemu_mutex_destroy(QemuMutex *mutex)
73 {
74     int err;
75 
76     assert(mutex->initialized);
77     mutex->initialized = false;
78     err = pthread_mutex_destroy(&mutex->lock);
79     if (err)
80         error_exit(err, __func__);
81 }
82 
83 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
84 {
85     int err;
86 
87     assert(mutex->initialized);
88     qemu_mutex_pre_lock(mutex, file, line);
89     err = pthread_mutex_lock(&mutex->lock);
90     if (err)
91         error_exit(err, __func__);
92     qemu_mutex_post_lock(mutex, file, line);
93 }
94 
95 int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
96 {
97     int err;
98 
99     assert(mutex->initialized);
100     err = pthread_mutex_trylock(&mutex->lock);
101     if (err == 0) {
102         qemu_mutex_post_lock(mutex, file, line);
103         return 0;
104     }
105     if (err != EBUSY) {
106         error_exit(err, __func__);
107     }
108     return -EBUSY;
109 }
110 
111 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line)
112 {
113     int err;
114 
115     assert(mutex->initialized);
116     qemu_mutex_pre_unlock(mutex, file, line);
117     err = pthread_mutex_unlock(&mutex->lock);
118     if (err)
119         error_exit(err, __func__);
120 }
121 
122 void qemu_rec_mutex_init(QemuRecMutex *mutex)
123 {
124     int err;
125     pthread_mutexattr_t attr;
126 
127     pthread_mutexattr_init(&attr);
128     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
129     err = pthread_mutex_init(&mutex->m.lock, &attr);
130     pthread_mutexattr_destroy(&attr);
131     if (err) {
132         error_exit(err, __func__);
133     }
134     mutex->m.initialized = true;
135 }
136 
137 void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
138 {
139     qemu_mutex_destroy(&mutex->m);
140 }
141 
142 void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
143 {
144     qemu_mutex_lock_impl(&mutex->m, file, line);
145 }
146 
147 int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
148 {
149     return qemu_mutex_trylock_impl(&mutex->m, file, line);
150 }
151 
152 void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
153 {
154     qemu_mutex_unlock_impl(&mutex->m, file, line);
155 }
156 
157 void qemu_cond_init(QemuCond *cond)
158 {
159     pthread_condattr_t attr;
160     int err;
161 
162     err = pthread_condattr_init(&attr);
163     if (err) {
164         error_exit(err, __func__);
165     }
166 #ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
167     err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid());
168     if (err) {
169         error_exit(err, __func__);
170     }
171 #endif
172     err = pthread_cond_init(&cond->cond, &attr);
173     if (err) {
174         error_exit(err, __func__);
175     }
176     err = pthread_condattr_destroy(&attr);
177     if (err) {
178         error_exit(err, __func__);
179     }
180     cond->initialized = true;
181 }
182 
183 void qemu_cond_destroy(QemuCond *cond)
184 {
185     int err;
186 
187     assert(cond->initialized);
188     cond->initialized = false;
189     err = pthread_cond_destroy(&cond->cond);
190     if (err)
191         error_exit(err, __func__);
192 }
193 
194 void qemu_cond_signal(QemuCond *cond)
195 {
196     int err;
197 
198     assert(cond->initialized);
199     err = pthread_cond_signal(&cond->cond);
200     if (err)
201         error_exit(err, __func__);
202 }
203 
204 void qemu_cond_broadcast(QemuCond *cond)
205 {
206     int err;
207 
208     assert(cond->initialized);
209     err = pthread_cond_broadcast(&cond->cond);
210     if (err)
211         error_exit(err, __func__);
212 }
213 
214 void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line)
215 {
216     int err;
217 
218     assert(cond->initialized);
219     qemu_mutex_pre_unlock(mutex, file, line);
220     err = pthread_cond_wait(&cond->cond, &mutex->lock);
221     qemu_mutex_post_lock(mutex, file, line);
222     if (err)
223         error_exit(err, __func__);
224 }
225 
226 static bool
227 qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts,
228                        const char *file, const int line)
229 {
230     int err;
231 
232     assert(cond->initialized);
233     trace_qemu_mutex_unlock(mutex, file, line);
234     err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts);
235     trace_qemu_mutex_locked(mutex, file, line);
236     if (err && err != ETIMEDOUT) {
237         error_exit(err, __func__);
238     }
239     return err != ETIMEDOUT;
240 }
241 
242 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
243                               const char *file, const int line)
244 {
245     struct timespec ts;
246 
247     compute_abs_deadline(&ts, ms);
248     return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line);
249 }
250 
251 void qemu_sem_init(QemuSemaphore *sem, int init)
252 {
253     qemu_mutex_init(&sem->mutex);
254     qemu_cond_init(&sem->cond);
255 
256     if (init < 0) {
257         error_exit(EINVAL, __func__);
258     }
259     sem->count = init;
260 }
261 
262 void qemu_sem_destroy(QemuSemaphore *sem)
263 {
264     qemu_cond_destroy(&sem->cond);
265     qemu_mutex_destroy(&sem->mutex);
266 }
267 
268 void qemu_sem_post(QemuSemaphore *sem)
269 {
270     qemu_mutex_lock(&sem->mutex);
271     if (sem->count == UINT_MAX) {
272         error_exit(EINVAL, __func__);
273     } else {
274         sem->count++;
275         qemu_cond_signal(&sem->cond);
276     }
277     qemu_mutex_unlock(&sem->mutex);
278 }
279 
280 int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
281 {
282     bool rc = true;
283     struct timespec ts;
284 
285     compute_abs_deadline(&ts, ms);
286     qemu_mutex_lock(&sem->mutex);
287     while (sem->count == 0) {
288         if (ms == 0) {
289             rc = false;
290         } else {
291             rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts,
292                                         __FILE__, __LINE__);
293         }
294         if (!rc) { /* timeout */
295             break;
296         }
297     }
298     if (rc) {
299         --sem->count;
300     }
301     qemu_mutex_unlock(&sem->mutex);
302     return (rc ? 0 : -1);
303 }
304 
305 void qemu_sem_wait(QemuSemaphore *sem)
306 {
307     qemu_mutex_lock(&sem->mutex);
308     while (sem->count == 0) {
309         qemu_cond_wait(&sem->cond, &sem->mutex);
310     }
311     --sem->count;
312     qemu_mutex_unlock(&sem->mutex);
313 }
314 
315 #ifdef __linux__
316 #include "qemu/futex.h"
317 #else
318 static inline void qemu_futex_wake(QemuEvent *ev, int n)
319 {
320     assert(ev->initialized);
321     pthread_mutex_lock(&ev->lock);
322     if (n == 1) {
323         pthread_cond_signal(&ev->cond);
324     } else {
325         pthread_cond_broadcast(&ev->cond);
326     }
327     pthread_mutex_unlock(&ev->lock);
328 }
329 
330 static inline void qemu_futex_wait(QemuEvent *ev, unsigned val)
331 {
332     assert(ev->initialized);
333     pthread_mutex_lock(&ev->lock);
334     if (ev->value == val) {
335         pthread_cond_wait(&ev->cond, &ev->lock);
336     }
337     pthread_mutex_unlock(&ev->lock);
338 }
339 #endif
340 
341 /* Valid transitions:
342  * - free->set, when setting the event
343  * - busy->set, when setting the event, followed by qemu_futex_wake
344  * - set->free, when resetting the event
345  * - free->busy, when waiting
346  *
347  * set->busy does not happen (it can be observed from the outside but
348  * it really is set->free->busy).
349  *
350  * busy->free provably cannot happen; to enforce it, the set->free transition
351  * is done with an OR, which becomes a no-op if the event has concurrently
352  * transitioned to free or busy.
353  */
354 
355 #define EV_SET         0
356 #define EV_FREE        1
357 #define EV_BUSY       -1
358 
359 void qemu_event_init(QemuEvent *ev, bool init)
360 {
361 #ifndef __linux__
362     pthread_mutex_init(&ev->lock, NULL);
363     pthread_cond_init(&ev->cond, NULL);
364 #endif
365 
366     ev->value = (init ? EV_SET : EV_FREE);
367     ev->initialized = true;
368 }
369 
370 void qemu_event_destroy(QemuEvent *ev)
371 {
372     assert(ev->initialized);
373     ev->initialized = false;
374 #ifndef __linux__
375     pthread_mutex_destroy(&ev->lock);
376     pthread_cond_destroy(&ev->cond);
377 #endif
378 }
379 
380 void qemu_event_set(QemuEvent *ev)
381 {
382     /* qemu_event_set has release semantics, but because it *loads*
383      * ev->value we need a full memory barrier here.
384      */
385     assert(ev->initialized);
386     smp_mb();
387     if (qatomic_read(&ev->value) != EV_SET) {
388         if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
389             /* There were waiters, wake them up.  */
390             qemu_futex_wake(ev, INT_MAX);
391         }
392     }
393 }
394 
395 void qemu_event_reset(QemuEvent *ev)
396 {
397     unsigned value;
398 
399     assert(ev->initialized);
400     value = qatomic_read(&ev->value);
401     smp_mb_acquire();
402     if (value == EV_SET) {
403         /*
404          * If there was a concurrent reset (or even reset+wait),
405          * do nothing.  Otherwise change EV_SET->EV_FREE.
406          */
407         qatomic_or(&ev->value, EV_FREE);
408     }
409 }
410 
411 void qemu_event_wait(QemuEvent *ev)
412 {
413     unsigned value;
414 
415     assert(ev->initialized);
416     value = qatomic_read(&ev->value);
417     smp_mb_acquire();
418     if (value != EV_SET) {
419         if (value == EV_FREE) {
420             /*
421              * Leave the event reset and tell qemu_event_set that there
422              * are waiters.  No need to retry, because there cannot be
423              * a concurrent busy->free transition.  After the CAS, the
424              * event will be either set or busy.
425              */
426             if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
427                 return;
428             }
429         }
430         qemu_futex_wait(ev, EV_BUSY);
431     }
432 }
433 
434 static __thread NotifierList thread_exit;
435 
436 /*
437  * Note that in this implementation you can register a thread-exit
438  * notifier for the main thread, but it will never be called.
439  * This is OK because main thread exit can only happen when the
440  * entire process is exiting, and the API allows notifiers to not
441  * be called on process exit.
442  */
443 void qemu_thread_atexit_add(Notifier *notifier)
444 {
445     notifier_list_add(&thread_exit, notifier);
446 }
447 
448 void qemu_thread_atexit_remove(Notifier *notifier)
449 {
450     notifier_remove(notifier);
451 }
452 
453 static void qemu_thread_atexit_notify(void *arg)
454 {
455     /*
456      * Called when non-main thread exits (via qemu_thread_exit()
457      * or by returning from its start routine.)
458      */
459     notifier_list_notify(&thread_exit, NULL);
460 }
461 
462 typedef struct {
463     void *(*start_routine)(void *);
464     void *arg;
465     char *name;
466 } QemuThreadArgs;
467 
468 static void *qemu_thread_start(void *args)
469 {
470     QemuThreadArgs *qemu_thread_args = args;
471     void *(*start_routine)(void *) = qemu_thread_args->start_routine;
472     void *arg = qemu_thread_args->arg;
473     void *r;
474 
475     /* Attempt to set the threads name; note that this is for debug, so
476      * we're not going to fail if we can't set it.
477      */
478     if (name_threads && qemu_thread_args->name) {
479 # if defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
480         pthread_setname_np(pthread_self(), qemu_thread_args->name);
481 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
482         pthread_setname_np(qemu_thread_args->name);
483 # endif
484     }
485     QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
486     g_free(qemu_thread_args->name);
487     g_free(qemu_thread_args);
488 
489     /*
490      * GCC 11 with glibc 2.17 on PowerPC reports
491      *
492      * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes
493      *   in a region of size 528 [-Werror=stringop-overflow=]
494      * 540 |     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
495      *     |     ^~~~~~~~~~~~~~~~~~~~
496      *
497      * which is clearly nonsense.
498      */
499 #pragma GCC diagnostic push
500 #ifndef __clang__
501 #pragma GCC diagnostic ignored "-Wstringop-overflow"
502 #endif
503 
504     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
505     r = start_routine(arg);
506     pthread_cleanup_pop(1);
507 
508 #pragma GCC diagnostic pop
509 
510     return r;
511 }
512 
513 void qemu_thread_create(QemuThread *thread, const char *name,
514                        void *(*start_routine)(void*),
515                        void *arg, int mode)
516 {
517     sigset_t set, oldset;
518     int err;
519     pthread_attr_t attr;
520     QemuThreadArgs *qemu_thread_args;
521 
522     err = pthread_attr_init(&attr);
523     if (err) {
524         error_exit(err, __func__);
525     }
526 
527     if (mode == QEMU_THREAD_DETACHED) {
528         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
529     }
530 
531     /* Leave signal handling to the iothread.  */
532     sigfillset(&set);
533     /* Blocking the signals can result in undefined behaviour. */
534     sigdelset(&set, SIGSEGV);
535     sigdelset(&set, SIGFPE);
536     sigdelset(&set, SIGILL);
537     /* TODO avoid SIGBUS loss on macOS */
538     pthread_sigmask(SIG_SETMASK, &set, &oldset);
539 
540     qemu_thread_args = g_new0(QemuThreadArgs, 1);
541     qemu_thread_args->name = g_strdup(name);
542     qemu_thread_args->start_routine = start_routine;
543     qemu_thread_args->arg = arg;
544 
545     err = pthread_create(&thread->thread, &attr,
546                          qemu_thread_start, qemu_thread_args);
547 
548     if (err)
549         error_exit(err, __func__);
550 
551     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
552 
553     pthread_attr_destroy(&attr);
554 }
555 
556 int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
557                              unsigned long nbits)
558 {
559 #if defined(CONFIG_PTHREAD_AFFINITY_NP)
560     const size_t setsize = CPU_ALLOC_SIZE(nbits);
561     unsigned long value;
562     cpu_set_t *cpuset;
563     int err;
564 
565     cpuset = CPU_ALLOC(nbits);
566     g_assert(cpuset);
567 
568     CPU_ZERO_S(setsize, cpuset);
569     value = find_first_bit(host_cpus, nbits);
570     while (value < nbits) {
571         CPU_SET_S(value, setsize, cpuset);
572         value = find_next_bit(host_cpus, nbits, value + 1);
573     }
574 
575     err = pthread_setaffinity_np(thread->thread, setsize, cpuset);
576     CPU_FREE(cpuset);
577     return err;
578 #else
579     return -ENOSYS;
580 #endif
581 }
582 
583 int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
584                              unsigned long *nbits)
585 {
586 #if defined(CONFIG_PTHREAD_AFFINITY_NP)
587     unsigned long tmpbits;
588     cpu_set_t *cpuset;
589     size_t setsize;
590     int i, err;
591 
592     tmpbits = CPU_SETSIZE;
593     while (true) {
594         setsize = CPU_ALLOC_SIZE(tmpbits);
595         cpuset = CPU_ALLOC(tmpbits);
596         g_assert(cpuset);
597 
598         err = pthread_getaffinity_np(thread->thread, setsize, cpuset);
599         if (err) {
600             CPU_FREE(cpuset);
601             if (err != -EINVAL) {
602                 return err;
603             }
604             tmpbits *= 2;
605         } else {
606             break;
607         }
608     }
609 
610     /* Convert the result into a proper bitmap. */
611     *nbits = tmpbits;
612     *host_cpus = bitmap_new(tmpbits);
613     for (i = 0; i < tmpbits; i++) {
614         if (CPU_ISSET(i, cpuset)) {
615             set_bit(i, *host_cpus);
616         }
617     }
618     CPU_FREE(cpuset);
619     return 0;
620 #else
621     return -ENOSYS;
622 #endif
623 }
624 
625 void qemu_thread_get_self(QemuThread *thread)
626 {
627     thread->thread = pthread_self();
628 }
629 
630 bool qemu_thread_is_self(QemuThread *thread)
631 {
632    return pthread_equal(pthread_self(), thread->thread);
633 }
634 
635 void qemu_thread_exit(void *retval)
636 {
637     pthread_exit(retval);
638 }
639 
640 void *qemu_thread_join(QemuThread *thread)
641 {
642     int err;
643     void *ret;
644 
645     err = pthread_join(thread->thread, &ret);
646     if (err) {
647         error_exit(err, __func__);
648     }
649     return ret;
650 }
651