xref: /openbmc/qemu/util/qemu-coroutine.c (revision 6328d8ffa6cb9d750e4bfcfd73ac25d3a39ceb63)
1 /*
2  * QEMU coroutines
3  *
4  * Copyright IBM, Corp. 2011
5  *
6  * Authors:
7  *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
8  *  Kevin Wolf         <kwolf@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11  * See the COPYING.LIB file in the top-level directory.
12  *
13  */
14 
15 #include "qemu/osdep.h"
16 #include "trace.h"
17 #include "qemu/thread.h"
18 #include "qemu/atomic.h"
19 #include "qemu/coroutine_int.h"
20 #include "qemu/coroutine-tls.h"
21 #include "qemu/cutils.h"
22 #include "block/aio.h"
23 
24 enum {
25     COROUTINE_POOL_BATCH_MAX_SIZE = 128,
26 };
27 
28 /*
29  * Coroutine creation and deletion is expensive so a pool of unused coroutines
30  * is kept as a cache. When the pool has coroutines available, they are
31  * recycled instead of creating new ones from scratch. Coroutines are added to
32  * the pool upon termination.
33  *
34  * The pool is global but each thread maintains a small local pool to avoid
35  * global pool contention. Threads fetch and return batches of coroutines from
36  * the global pool to maintain their local pool. The local pool holds up to two
37  * batches whereas the maximum size of the global pool is controlled by the
38  * qemu_coroutine_inc_pool_size() API.
39  *
40  * .-----------------------------------.
41  * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
42  * `-----------------------------------'
43  *
44  * .-------------------.
45  * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
46  * `-------------------'
47  */
48 typedef struct CoroutinePoolBatch {
49     /* Batches are kept in a list */
50     QSLIST_ENTRY(CoroutinePoolBatch) next;
51 
52     /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */
53     QSLIST_HEAD(, Coroutine) list;
54     unsigned int size;
55 } CoroutinePoolBatch;
56 
57 typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool;
58 
59 /* Host operating system limit on number of pooled coroutines */
60 static unsigned int global_pool_hard_max_size;
61 
62 static QemuMutex global_pool_lock; /* protects the following variables */
63 static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool);
64 static unsigned int global_pool_size;
65 static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE;
66 
67 QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool);
68 QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier);
69 
70 static CoroutinePoolBatch *coroutine_pool_batch_new(void)
71 {
72     CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1);
73 
74     QSLIST_INIT(&batch->list);
75     batch->size = 0;
76     return batch;
77 }
78 
79 static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch)
80 {
81     Coroutine *co;
82     Coroutine *tmp;
83 
84     QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) {
85         QSLIST_REMOVE_HEAD(&batch->list, pool_next);
86         qemu_coroutine_delete(co);
87     }
88     g_free(batch);
89 }
90 
91 static void local_pool_cleanup(Notifier *n, void *value)
92 {
93     CoroutinePool *local_pool = get_ptr_local_pool();
94     CoroutinePoolBatch *batch;
95     CoroutinePoolBatch *tmp;
96 
97     QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) {
98         QSLIST_REMOVE_HEAD(local_pool, next);
99         coroutine_pool_batch_delete(batch);
100     }
101 }
102 
103 /* Ensure the atexit notifier is registered */
104 static void local_pool_cleanup_init_once(void)
105 {
106     Notifier *notifier = get_ptr_local_pool_cleanup_notifier();
107     if (!notifier->notify) {
108         notifier->notify = local_pool_cleanup;
109         qemu_thread_atexit_add(notifier);
110     }
111 }
112 
113 /* Helper to get the next unused coroutine from the local pool */
114 static Coroutine *coroutine_pool_get_local(void)
115 {
116     CoroutinePool *local_pool = get_ptr_local_pool();
117     CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
118     Coroutine *co;
119 
120     if (unlikely(!batch)) {
121         return NULL;
122     }
123 
124     co = QSLIST_FIRST(&batch->list);
125     QSLIST_REMOVE_HEAD(&batch->list, pool_next);
126     batch->size--;
127 
128     if (batch->size == 0) {
129         QSLIST_REMOVE_HEAD(local_pool, next);
130         coroutine_pool_batch_delete(batch);
131     }
132     return co;
133 }
134 
135 /* Get the next batch from the global pool */
136 static void coroutine_pool_refill_local(void)
137 {
138     CoroutinePool *local_pool = get_ptr_local_pool();
139     CoroutinePoolBatch *batch;
140 
141     WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
142         batch = QSLIST_FIRST(&global_pool);
143 
144         if (batch) {
145             QSLIST_REMOVE_HEAD(&global_pool, next);
146             global_pool_size -= batch->size;
147         }
148     }
149 
150     if (batch) {
151         QSLIST_INSERT_HEAD(local_pool, batch, next);
152         local_pool_cleanup_init_once();
153     }
154 }
155 
156 /* Add a batch of coroutines to the global pool */
157 static void coroutine_pool_put_global(CoroutinePoolBatch *batch)
158 {
159     WITH_QEMU_LOCK_GUARD(&global_pool_lock) {
160         unsigned int max = MIN(global_pool_max_size,
161                                global_pool_hard_max_size);
162 
163         if (global_pool_size < max) {
164             QSLIST_INSERT_HEAD(&global_pool, batch, next);
165 
166             /* Overshooting the max pool size is allowed */
167             global_pool_size += batch->size;
168             return;
169         }
170     }
171 
172     /* The global pool was full, so throw away this batch */
173     coroutine_pool_batch_delete(batch);
174 }
175 
176 /* Get the next unused coroutine from the pool or return NULL */
177 static Coroutine *coroutine_pool_get(void)
178 {
179     Coroutine *co;
180 
181     co = coroutine_pool_get_local();
182     if (!co) {
183         coroutine_pool_refill_local();
184         co = coroutine_pool_get_local();
185     }
186     return co;
187 }
188 
189 static void coroutine_pool_put(Coroutine *co)
190 {
191     CoroutinePool *local_pool = get_ptr_local_pool();
192     CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool);
193 
194     if (unlikely(!batch)) {
195         batch = coroutine_pool_batch_new();
196         QSLIST_INSERT_HEAD(local_pool, batch, next);
197         local_pool_cleanup_init_once();
198     }
199 
200     if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) {
201         CoroutinePoolBatch *next = QSLIST_NEXT(batch, next);
202 
203         /* Is the local pool full? */
204         if (next) {
205             QSLIST_REMOVE_HEAD(local_pool, next);
206             coroutine_pool_put_global(batch);
207         }
208 
209         batch = coroutine_pool_batch_new();
210         QSLIST_INSERT_HEAD(local_pool, batch, next);
211     }
212 
213     QSLIST_INSERT_HEAD(&batch->list, co, pool_next);
214     batch->size++;
215 }
216 
217 Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
218 {
219     Coroutine *co = NULL;
220 
221     if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
222         co = coroutine_pool_get();
223     }
224 
225     if (!co) {
226         co = qemu_coroutine_new();
227     }
228 
229     co->entry = entry;
230     co->entry_arg = opaque;
231     QSIMPLEQ_INIT(&co->co_queue_wakeup);
232     return co;
233 }
234 
235 static void coroutine_delete(Coroutine *co)
236 {
237     co->caller = NULL;
238 
239     if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
240         coroutine_pool_put(co);
241     } else {
242         qemu_coroutine_delete(co);
243     }
244 }
245 
246 void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
247 {
248     QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending);
249     Coroutine *from = qemu_coroutine_self();
250 
251     QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next);
252 
253     /* Run co and any queued coroutines */
254     while (!QSIMPLEQ_EMPTY(&pending)) {
255         Coroutine *to = QSIMPLEQ_FIRST(&pending);
256         CoroutineAction ret;
257 
258         /*
259          * Read to before to->scheduled; pairs with qatomic_cmpxchg in
260          * qemu_co_sleep(), aio_co_schedule() etc.
261          */
262         smp_read_barrier_depends();
263 
264         const char *scheduled = qatomic_read(&to->scheduled);
265 
266         QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
267 
268         trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg);
269 
270         /* if the Coroutine has already been scheduled, entering it again will
271          * cause us to enter it twice, potentially even after the coroutine has
272          * been deleted */
273         if (scheduled) {
274             fprintf(stderr,
275                     "%s: Co-routine was already scheduled in '%s'\n",
276                     __func__, scheduled);
277             abort();
278         }
279 
280         if (to->caller) {
281             fprintf(stderr, "Co-routine re-entered recursively\n");
282             abort();
283         }
284 
285         to->caller = from;
286         to->ctx = ctx;
287 
288         /* Store to->ctx before anything that stores to.  Matches
289          * barrier in aio_co_wake and qemu_co_mutex_wake.
290          */
291         smp_wmb();
292 
293         ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER);
294 
295         /* Queued coroutines are run depth-first; previously pending coroutines
296          * run after those queued more recently.
297          */
298         QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup);
299 
300         switch (ret) {
301         case COROUTINE_YIELD:
302             break;
303         case COROUTINE_TERMINATE:
304             assert(!to->locks_held);
305             trace_qemu_coroutine_terminate(to);
306             coroutine_delete(to);
307             break;
308         default:
309             abort();
310         }
311     }
312 }
313 
314 void qemu_coroutine_enter(Coroutine *co)
315 {
316     qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co);
317 }
318 
319 void qemu_coroutine_enter_if_inactive(Coroutine *co)
320 {
321     if (!qemu_coroutine_entered(co)) {
322         qemu_coroutine_enter(co);
323     }
324 }
325 
326 void coroutine_fn qemu_coroutine_yield(void)
327 {
328     Coroutine *self = qemu_coroutine_self();
329     Coroutine *to = self->caller;
330 
331     trace_qemu_coroutine_yield(self, to);
332 
333     if (!to) {
334         fprintf(stderr, "Co-routine is yielding to no one\n");
335         abort();
336     }
337 
338     self->caller = NULL;
339     qemu_coroutine_switch(self, to, COROUTINE_YIELD);
340 }
341 
342 bool qemu_coroutine_entered(Coroutine *co)
343 {
344     return co->caller;
345 }
346 
347 AioContext *qemu_coroutine_get_aio_context(Coroutine *co)
348 {
349     return co->ctx;
350 }
351 
352 void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
353 {
354     QEMU_LOCK_GUARD(&global_pool_lock);
355     global_pool_max_size += additional_pool_size;
356 }
357 
358 void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
359 {
360     QEMU_LOCK_GUARD(&global_pool_lock);
361     global_pool_max_size -= removing_pool_size;
362 }
363 
364 static unsigned int get_global_pool_hard_max_size(void)
365 {
366 #ifdef __linux__
367     g_autofree char *contents = NULL;
368     int max_map_count;
369 
370     /*
371      * Linux processes can have up to max_map_count virtual memory areas
372      * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We
373      * must limit the coroutine pool to a safe size to avoid running out of
374      * VMAs.
375      */
376     if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL,
377                             NULL) &&
378         qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) {
379         /*
380          * This is an upper bound that avoids exceeding max_map_count. Leave a
381          * fixed amount for non-coroutine users like library dependencies,
382          * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the
383          * remaining amount.
384          */
385         if (max_map_count > 5000) {
386             return (max_map_count - 5000) / 2;
387         } else {
388             /* Disable the global pool but threads still have local pools */
389             return 0;
390         }
391     }
392 #endif
393 
394     return UINT_MAX;
395 }
396 
397 static void __attribute__((constructor)) qemu_coroutine_init(void)
398 {
399     qemu_mutex_init(&global_pool_lock);
400     global_pool_hard_max_size = get_global_pool_hard_max_size();
401 }
402