1 /* 2 * QEMU coroutines 3 * 4 * Copyright IBM, Corp. 2011 5 * 6 * Authors: 7 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> 8 * Kevin Wolf <kwolf@redhat.com> 9 * 10 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 11 * See the COPYING.LIB file in the top-level directory. 12 * 13 */ 14 15 #include "qemu/osdep.h" 16 #include "trace.h" 17 #include "qemu/thread.h" 18 #include "qemu/atomic.h" 19 #include "qemu/coroutine_int.h" 20 #include "qemu/coroutine-tls.h" 21 #include "qemu/cutils.h" 22 #include "block/aio.h" 23 24 enum { 25 COROUTINE_POOL_BATCH_MAX_SIZE = 128, 26 }; 27 28 /* 29 * Coroutine creation and deletion is expensive so a pool of unused coroutines 30 * is kept as a cache. When the pool has coroutines available, they are 31 * recycled instead of creating new ones from scratch. Coroutines are added to 32 * the pool upon termination. 33 * 34 * The pool is global but each thread maintains a small local pool to avoid 35 * global pool contention. Threads fetch and return batches of coroutines from 36 * the global pool to maintain their local pool. The local pool holds up to two 37 * batches whereas the maximum size of the global pool is controlled by the 38 * qemu_coroutine_inc_pool_size() API. 39 * 40 * .-----------------------------------. 41 * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool 42 * `-----------------------------------' 43 * 44 * .-------------------. 45 * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) 46 * `-------------------' 47 */ 48 typedef struct CoroutinePoolBatch { 49 /* Batches are kept in a list */ 50 QSLIST_ENTRY(CoroutinePoolBatch) next; 51 52 /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */ 53 QSLIST_HEAD(, Coroutine) list; 54 unsigned int size; 55 } CoroutinePoolBatch; 56 57 typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool; 58 59 /* Host operating system limit on number of pooled coroutines */ 60 static unsigned int global_pool_hard_max_size; 61 62 static QemuMutex global_pool_lock; /* protects the following variables */ 63 static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool); 64 static unsigned int global_pool_size; 65 static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE; 66 67 QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool); 68 QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier); 69 70 static CoroutinePoolBatch *coroutine_pool_batch_new(void) 71 { 72 CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1); 73 74 QSLIST_INIT(&batch->list); 75 batch->size = 0; 76 return batch; 77 } 78 79 static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch) 80 { 81 Coroutine *co; 82 Coroutine *tmp; 83 84 QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) { 85 QSLIST_REMOVE_HEAD(&batch->list, pool_next); 86 qemu_coroutine_delete(co); 87 } 88 g_free(batch); 89 } 90 91 static void local_pool_cleanup(Notifier *n, void *value) 92 { 93 CoroutinePool *local_pool = get_ptr_local_pool(); 94 CoroutinePoolBatch *batch; 95 CoroutinePoolBatch *tmp; 96 97 QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) { 98 QSLIST_REMOVE_HEAD(local_pool, next); 99 coroutine_pool_batch_delete(batch); 100 } 101 } 102 103 /* Ensure the atexit notifier is registered */ 104 static void local_pool_cleanup_init_once(void) 105 { 106 Notifier *notifier = get_ptr_local_pool_cleanup_notifier(); 107 if (!notifier->notify) { 108 notifier->notify = local_pool_cleanup; 109 qemu_thread_atexit_add(notifier); 110 } 111 } 112 113 /* Helper to get the next unused coroutine from the local pool */ 114 static Coroutine *coroutine_pool_get_local(void) 115 { 116 CoroutinePool *local_pool = get_ptr_local_pool(); 117 CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); 118 Coroutine *co; 119 120 if (unlikely(!batch)) { 121 return NULL; 122 } 123 124 co = QSLIST_FIRST(&batch->list); 125 QSLIST_REMOVE_HEAD(&batch->list, pool_next); 126 batch->size--; 127 128 if (batch->size == 0) { 129 QSLIST_REMOVE_HEAD(local_pool, next); 130 coroutine_pool_batch_delete(batch); 131 } 132 return co; 133 } 134 135 /* Get the next batch from the global pool */ 136 static void coroutine_pool_refill_local(void) 137 { 138 CoroutinePool *local_pool = get_ptr_local_pool(); 139 CoroutinePoolBatch *batch = NULL; 140 141 WITH_QEMU_LOCK_GUARD(&global_pool_lock) { 142 batch = QSLIST_FIRST(&global_pool); 143 144 if (batch) { 145 QSLIST_REMOVE_HEAD(&global_pool, next); 146 global_pool_size -= batch->size; 147 } 148 } 149 150 if (batch) { 151 QSLIST_INSERT_HEAD(local_pool, batch, next); 152 local_pool_cleanup_init_once(); 153 } 154 } 155 156 /* Add a batch of coroutines to the global pool */ 157 static void coroutine_pool_put_global(CoroutinePoolBatch *batch) 158 { 159 WITH_QEMU_LOCK_GUARD(&global_pool_lock) { 160 unsigned int max = MIN(global_pool_max_size, 161 global_pool_hard_max_size); 162 163 if (global_pool_size < max) { 164 QSLIST_INSERT_HEAD(&global_pool, batch, next); 165 166 /* Overshooting the max pool size is allowed */ 167 global_pool_size += batch->size; 168 return; 169 } 170 } 171 172 /* The global pool was full, so throw away this batch */ 173 coroutine_pool_batch_delete(batch); 174 } 175 176 /* Get the next unused coroutine from the pool or return NULL */ 177 static Coroutine *coroutine_pool_get(void) 178 { 179 Coroutine *co; 180 181 co = coroutine_pool_get_local(); 182 if (!co) { 183 coroutine_pool_refill_local(); 184 co = coroutine_pool_get_local(); 185 } 186 return co; 187 } 188 189 static void coroutine_pool_put(Coroutine *co) 190 { 191 CoroutinePool *local_pool = get_ptr_local_pool(); 192 CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); 193 194 if (unlikely(!batch)) { 195 batch = coroutine_pool_batch_new(); 196 QSLIST_INSERT_HEAD(local_pool, batch, next); 197 local_pool_cleanup_init_once(); 198 } 199 200 if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) { 201 CoroutinePoolBatch *next = QSLIST_NEXT(batch, next); 202 203 /* Is the local pool full? */ 204 if (next) { 205 QSLIST_REMOVE_HEAD(local_pool, next); 206 coroutine_pool_put_global(batch); 207 } 208 209 batch = coroutine_pool_batch_new(); 210 QSLIST_INSERT_HEAD(local_pool, batch, next); 211 } 212 213 QSLIST_INSERT_HEAD(&batch->list, co, pool_next); 214 batch->size++; 215 } 216 217 Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) 218 { 219 Coroutine *co = NULL; 220 221 if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { 222 co = coroutine_pool_get(); 223 } 224 225 if (!co) { 226 co = qemu_coroutine_new(); 227 } 228 229 co->entry = entry; 230 co->entry_arg = opaque; 231 QSIMPLEQ_INIT(&co->co_queue_wakeup); 232 return co; 233 } 234 235 static void coroutine_delete(Coroutine *co) 236 { 237 co->caller = NULL; 238 239 if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { 240 coroutine_pool_put(co); 241 } else { 242 qemu_coroutine_delete(co); 243 } 244 } 245 246 void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) 247 { 248 QSIMPLEQ_HEAD(, Coroutine) pending = QSIMPLEQ_HEAD_INITIALIZER(pending); 249 Coroutine *from = qemu_coroutine_self(); 250 251 QSIMPLEQ_INSERT_TAIL(&pending, co, co_queue_next); 252 253 /* Run co and any queued coroutines */ 254 while (!QSIMPLEQ_EMPTY(&pending)) { 255 Coroutine *to = QSIMPLEQ_FIRST(&pending); 256 CoroutineAction ret; 257 258 /* 259 * Read to before to->scheduled; pairs with qatomic_cmpxchg in 260 * qemu_co_sleep(), aio_co_schedule() etc. 261 */ 262 smp_read_barrier_depends(); 263 264 const char *scheduled = qatomic_read(&to->scheduled); 265 266 QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next); 267 268 trace_qemu_aio_coroutine_enter(ctx, from, to, to->entry_arg); 269 270 /* if the Coroutine has already been scheduled, entering it again will 271 * cause us to enter it twice, potentially even after the coroutine has 272 * been deleted */ 273 if (scheduled) { 274 fprintf(stderr, 275 "%s: Co-routine was already scheduled in '%s'\n", 276 __func__, scheduled); 277 abort(); 278 } 279 280 if (to->caller) { 281 fprintf(stderr, "Co-routine re-entered recursively\n"); 282 abort(); 283 } 284 285 to->caller = from; 286 to->ctx = ctx; 287 288 /* Store to->ctx before anything that stores to. Matches 289 * barrier in aio_co_wake and qemu_co_mutex_wake. 290 */ 291 smp_wmb(); 292 293 ret = qemu_coroutine_switch(from, to, COROUTINE_ENTER); 294 295 /* Queued coroutines are run depth-first; previously pending coroutines 296 * run after those queued more recently. 297 */ 298 QSIMPLEQ_PREPEND(&pending, &to->co_queue_wakeup); 299 300 switch (ret) { 301 case COROUTINE_YIELD: 302 break; 303 case COROUTINE_TERMINATE: 304 assert(!to->locks_held); 305 trace_qemu_coroutine_terminate(to); 306 coroutine_delete(to); 307 break; 308 default: 309 abort(); 310 } 311 } 312 } 313 314 void qemu_coroutine_enter(Coroutine *co) 315 { 316 qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co); 317 } 318 319 void qemu_coroutine_enter_if_inactive(Coroutine *co) 320 { 321 if (!qemu_coroutine_entered(co)) { 322 qemu_coroutine_enter(co); 323 } 324 } 325 326 void coroutine_fn qemu_coroutine_yield(void) 327 { 328 Coroutine *self = qemu_coroutine_self(); 329 Coroutine *to = self->caller; 330 331 trace_qemu_coroutine_yield(self, to); 332 333 if (!to) { 334 fprintf(stderr, "Co-routine is yielding to no one\n"); 335 abort(); 336 } 337 338 self->caller = NULL; 339 qemu_coroutine_switch(self, to, COROUTINE_YIELD); 340 } 341 342 bool qemu_coroutine_entered(Coroutine *co) 343 { 344 return co->caller; 345 } 346 347 AioContext *qemu_coroutine_get_aio_context(Coroutine *co) 348 { 349 return co->ctx; 350 } 351 352 void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) 353 { 354 QEMU_LOCK_GUARD(&global_pool_lock); 355 global_pool_max_size += additional_pool_size; 356 } 357 358 void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) 359 { 360 QEMU_LOCK_GUARD(&global_pool_lock); 361 global_pool_max_size -= removing_pool_size; 362 } 363 364 static unsigned int get_global_pool_hard_max_size(void) 365 { 366 #ifdef __linux__ 367 g_autofree char *contents = NULL; 368 int max_map_count; 369 370 /* 371 * Linux processes can have up to max_map_count virtual memory areas 372 * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We 373 * must limit the coroutine pool to a safe size to avoid running out of 374 * VMAs. 375 */ 376 if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL, 377 NULL) && 378 qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { 379 /* 380 * This is an upper bound that avoids exceeding max_map_count. Leave a 381 * fixed amount for non-coroutine users like library dependencies, 382 * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the 383 * remaining amount. 384 */ 385 if (max_map_count > 5000) { 386 return (max_map_count - 5000) / 2; 387 } else { 388 /* Disable the global pool but threads still have local pools */ 389 return 0; 390 } 391 } 392 #endif 393 394 return UINT_MAX; 395 } 396 397 static void __attribute__((constructor)) qemu_coroutine_init(void) 398 { 399 qemu_mutex_init(&global_pool_lock); 400 global_pool_hard_max_size = get_global_pool_hard_max_size(); 401 } 402