1c2b38b27SPaolo Bonzini /* 2c2b38b27SPaolo Bonzini * Data plane event loop 3c2b38b27SPaolo Bonzini * 4c2b38b27SPaolo Bonzini * Copyright (c) 2003-2008 Fabrice Bellard 5c2b38b27SPaolo Bonzini * Copyright (c) 2009-2017 QEMU contributors 6c2b38b27SPaolo Bonzini * 7c2b38b27SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy 8c2b38b27SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal 9c2b38b27SPaolo Bonzini * in the Software without restriction, including without limitation the rights 10c2b38b27SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11c2b38b27SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is 12c2b38b27SPaolo Bonzini * furnished to do so, subject to the following conditions: 13c2b38b27SPaolo Bonzini * 14c2b38b27SPaolo Bonzini * The above copyright notice and this permission notice shall be included in 15c2b38b27SPaolo Bonzini * all copies or substantial portions of the Software. 16c2b38b27SPaolo Bonzini * 17c2b38b27SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18c2b38b27SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19c2b38b27SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20c2b38b27SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21c2b38b27SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22c2b38b27SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23c2b38b27SPaolo Bonzini * THE SOFTWARE. 24c2b38b27SPaolo Bonzini */ 25c2b38b27SPaolo Bonzini 26c2b38b27SPaolo Bonzini #include "qemu/osdep.h" 27c2b38b27SPaolo Bonzini #include "qapi/error.h" 28c2b38b27SPaolo Bonzini #include "block/aio.h" 29c2b38b27SPaolo Bonzini #include "block/thread-pool.h" 30c2b38b27SPaolo Bonzini #include "qemu/main-loop.h" 31c2b38b27SPaolo Bonzini #include "qemu/atomic.h" 32c2b38b27SPaolo Bonzini #include "block/raw-aio.h" 330c330a73SPaolo Bonzini #include "qemu/coroutine_int.h" 340c330a73SPaolo Bonzini #include "trace.h" 35c2b38b27SPaolo Bonzini 36c2b38b27SPaolo Bonzini /***********************************************************/ 37c2b38b27SPaolo Bonzini /* bottom halves (can be seen as timers which expire ASAP) */ 38c2b38b27SPaolo Bonzini 39c2b38b27SPaolo Bonzini struct QEMUBH { 40c2b38b27SPaolo Bonzini AioContext *ctx; 41c2b38b27SPaolo Bonzini QEMUBHFunc *cb; 42c2b38b27SPaolo Bonzini void *opaque; 43c2b38b27SPaolo Bonzini QEMUBH *next; 44c2b38b27SPaolo Bonzini bool scheduled; 45c2b38b27SPaolo Bonzini bool idle; 46c2b38b27SPaolo Bonzini bool deleted; 47c2b38b27SPaolo Bonzini }; 48c2b38b27SPaolo Bonzini 49c2b38b27SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 50c2b38b27SPaolo Bonzini { 51c2b38b27SPaolo Bonzini QEMUBH *bh; 52c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 53c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 54c2b38b27SPaolo Bonzini .ctx = ctx, 55c2b38b27SPaolo Bonzini .cb = cb, 56c2b38b27SPaolo Bonzini .opaque = opaque, 57c2b38b27SPaolo Bonzini }; 58c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 59c2b38b27SPaolo Bonzini bh->next = ctx->first_bh; 60c2b38b27SPaolo Bonzini bh->scheduled = 1; 61c2b38b27SPaolo Bonzini bh->deleted = 1; 62c2b38b27SPaolo Bonzini /* Make sure that the members are ready before putting bh into list */ 63c2b38b27SPaolo Bonzini smp_wmb(); 64c2b38b27SPaolo Bonzini ctx->first_bh = bh; 65c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 66c2b38b27SPaolo Bonzini aio_notify(ctx); 67c2b38b27SPaolo Bonzini } 68c2b38b27SPaolo Bonzini 69c2b38b27SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 70c2b38b27SPaolo Bonzini { 71c2b38b27SPaolo Bonzini QEMUBH *bh; 72c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 73c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 74c2b38b27SPaolo Bonzini .ctx = ctx, 75c2b38b27SPaolo Bonzini .cb = cb, 76c2b38b27SPaolo Bonzini .opaque = opaque, 77c2b38b27SPaolo Bonzini }; 78c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 79c2b38b27SPaolo Bonzini bh->next = ctx->first_bh; 80c2b38b27SPaolo Bonzini /* Make sure that the members are ready before putting bh into list */ 81c2b38b27SPaolo Bonzini smp_wmb(); 82c2b38b27SPaolo Bonzini ctx->first_bh = bh; 83c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 84c2b38b27SPaolo Bonzini return bh; 85c2b38b27SPaolo Bonzini } 86c2b38b27SPaolo Bonzini 87c2b38b27SPaolo Bonzini void aio_bh_call(QEMUBH *bh) 88c2b38b27SPaolo Bonzini { 89c2b38b27SPaolo Bonzini bh->cb(bh->opaque); 90c2b38b27SPaolo Bonzini } 91c2b38b27SPaolo Bonzini 92bd451435SPaolo Bonzini /* Multiple occurrences of aio_bh_poll cannot be called concurrently. 93bd451435SPaolo Bonzini * The count in ctx->list_lock is incremented before the call, and is 94bd451435SPaolo Bonzini * not affected by the call. 95bd451435SPaolo Bonzini */ 96c2b38b27SPaolo Bonzini int aio_bh_poll(AioContext *ctx) 97c2b38b27SPaolo Bonzini { 98c2b38b27SPaolo Bonzini QEMUBH *bh, **bhp, *next; 99c2b38b27SPaolo Bonzini int ret; 100c2b38b27SPaolo Bonzini bool deleted = false; 101c2b38b27SPaolo Bonzini 102c2b38b27SPaolo Bonzini ret = 0; 103c2b38b27SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) { 104c2b38b27SPaolo Bonzini next = atomic_rcu_read(&bh->next); 105c2b38b27SPaolo Bonzini /* The atomic_xchg is paired with the one in qemu_bh_schedule. The 106c2b38b27SPaolo Bonzini * implicit memory barrier ensures that the callback sees all writes 107c2b38b27SPaolo Bonzini * done by the scheduling thread. It also ensures that the scheduling 108c2b38b27SPaolo Bonzini * thread sees the zero before bh->cb has run, and thus will call 109c2b38b27SPaolo Bonzini * aio_notify again if necessary. 110c2b38b27SPaolo Bonzini */ 111c2b38b27SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 0)) { 112c2b38b27SPaolo Bonzini /* Idle BHs don't count as progress */ 113c2b38b27SPaolo Bonzini if (!bh->idle) { 114c2b38b27SPaolo Bonzini ret = 1; 115c2b38b27SPaolo Bonzini } 116c2b38b27SPaolo Bonzini bh->idle = 0; 117c2b38b27SPaolo Bonzini aio_bh_call(bh); 118c2b38b27SPaolo Bonzini } 119c2b38b27SPaolo Bonzini if (bh->deleted) { 120c2b38b27SPaolo Bonzini deleted = true; 121c2b38b27SPaolo Bonzini } 122c2b38b27SPaolo Bonzini } 123c2b38b27SPaolo Bonzini 124c2b38b27SPaolo Bonzini /* remove deleted bhs */ 125c2b38b27SPaolo Bonzini if (!deleted) { 126c2b38b27SPaolo Bonzini return ret; 127c2b38b27SPaolo Bonzini } 128c2b38b27SPaolo Bonzini 129bd451435SPaolo Bonzini if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { 130c2b38b27SPaolo Bonzini bhp = &ctx->first_bh; 131c2b38b27SPaolo Bonzini while (*bhp) { 132c2b38b27SPaolo Bonzini bh = *bhp; 133c2b38b27SPaolo Bonzini if (bh->deleted && !bh->scheduled) { 134c2b38b27SPaolo Bonzini *bhp = bh->next; 135c2b38b27SPaolo Bonzini g_free(bh); 136c2b38b27SPaolo Bonzini } else { 137c2b38b27SPaolo Bonzini bhp = &bh->next; 138c2b38b27SPaolo Bonzini } 139c2b38b27SPaolo Bonzini } 140bd451435SPaolo Bonzini qemu_lockcnt_inc_and_unlock(&ctx->list_lock); 141c2b38b27SPaolo Bonzini } 142c2b38b27SPaolo Bonzini return ret; 143c2b38b27SPaolo Bonzini } 144c2b38b27SPaolo Bonzini 145c2b38b27SPaolo Bonzini void qemu_bh_schedule_idle(QEMUBH *bh) 146c2b38b27SPaolo Bonzini { 147c2b38b27SPaolo Bonzini bh->idle = 1; 148c2b38b27SPaolo Bonzini /* Make sure that idle & any writes needed by the callback are done 149c2b38b27SPaolo Bonzini * before the locations are read in the aio_bh_poll. 150c2b38b27SPaolo Bonzini */ 151c2b38b27SPaolo Bonzini atomic_mb_set(&bh->scheduled, 1); 152c2b38b27SPaolo Bonzini } 153c2b38b27SPaolo Bonzini 154c2b38b27SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh) 155c2b38b27SPaolo Bonzini { 156c2b38b27SPaolo Bonzini AioContext *ctx; 157c2b38b27SPaolo Bonzini 158c2b38b27SPaolo Bonzini ctx = bh->ctx; 159c2b38b27SPaolo Bonzini bh->idle = 0; 160c2b38b27SPaolo Bonzini /* The memory barrier implicit in atomic_xchg makes sure that: 161c2b38b27SPaolo Bonzini * 1. idle & any writes needed by the callback are done before the 162c2b38b27SPaolo Bonzini * locations are read in the aio_bh_poll. 163c2b38b27SPaolo Bonzini * 2. ctx is loaded before scheduled is set and the callback has a chance 164c2b38b27SPaolo Bonzini * to execute. 165c2b38b27SPaolo Bonzini */ 166c2b38b27SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 1) == 0) { 167c2b38b27SPaolo Bonzini aio_notify(ctx); 168c2b38b27SPaolo Bonzini } 169c2b38b27SPaolo Bonzini } 170c2b38b27SPaolo Bonzini 171c2b38b27SPaolo Bonzini 172c2b38b27SPaolo Bonzini /* This func is async. 173c2b38b27SPaolo Bonzini */ 174c2b38b27SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh) 175c2b38b27SPaolo Bonzini { 176ef6dada8SSergio Lopez atomic_mb_set(&bh->scheduled, 0); 177c2b38b27SPaolo Bonzini } 178c2b38b27SPaolo Bonzini 179c2b38b27SPaolo Bonzini /* This func is async.The bottom half will do the delete action at the finial 180c2b38b27SPaolo Bonzini * end. 181c2b38b27SPaolo Bonzini */ 182c2b38b27SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh) 183c2b38b27SPaolo Bonzini { 184c2b38b27SPaolo Bonzini bh->scheduled = 0; 185c2b38b27SPaolo Bonzini bh->deleted = 1; 186c2b38b27SPaolo Bonzini } 187c2b38b27SPaolo Bonzini 188c2b38b27SPaolo Bonzini int64_t 189c2b38b27SPaolo Bonzini aio_compute_timeout(AioContext *ctx) 190c2b38b27SPaolo Bonzini { 191c2b38b27SPaolo Bonzini int64_t deadline; 192c2b38b27SPaolo Bonzini int timeout = -1; 193c2b38b27SPaolo Bonzini QEMUBH *bh; 194c2b38b27SPaolo Bonzini 195c2b38b27SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; 196c2b38b27SPaolo Bonzini bh = atomic_rcu_read(&bh->next)) { 197c2b38b27SPaolo Bonzini if (bh->scheduled) { 198c2b38b27SPaolo Bonzini if (bh->idle) { 199c2b38b27SPaolo Bonzini /* idle bottom halves will be polled at least 200c2b38b27SPaolo Bonzini * every 10ms */ 201c2b38b27SPaolo Bonzini timeout = 10000000; 202c2b38b27SPaolo Bonzini } else { 203c2b38b27SPaolo Bonzini /* non-idle bottom halves will be executed 204c2b38b27SPaolo Bonzini * immediately */ 205c2b38b27SPaolo Bonzini return 0; 206c2b38b27SPaolo Bonzini } 207c2b38b27SPaolo Bonzini } 208c2b38b27SPaolo Bonzini } 209c2b38b27SPaolo Bonzini 210c2b38b27SPaolo Bonzini deadline = timerlistgroup_deadline_ns(&ctx->tlg); 211c2b38b27SPaolo Bonzini if (deadline == 0) { 212c2b38b27SPaolo Bonzini return 0; 213c2b38b27SPaolo Bonzini } else { 214c2b38b27SPaolo Bonzini return qemu_soonest_timeout(timeout, deadline); 215c2b38b27SPaolo Bonzini } 216c2b38b27SPaolo Bonzini } 217c2b38b27SPaolo Bonzini 218c2b38b27SPaolo Bonzini static gboolean 219c2b38b27SPaolo Bonzini aio_ctx_prepare(GSource *source, gint *timeout) 220c2b38b27SPaolo Bonzini { 221c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 222c2b38b27SPaolo Bonzini 223c2b38b27SPaolo Bonzini atomic_or(&ctx->notify_me, 1); 224c2b38b27SPaolo Bonzini 225c2b38b27SPaolo Bonzini /* We assume there is no timeout already supplied */ 226c2b38b27SPaolo Bonzini *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); 227c2b38b27SPaolo Bonzini 228c2b38b27SPaolo Bonzini if (aio_prepare(ctx)) { 229c2b38b27SPaolo Bonzini *timeout = 0; 230c2b38b27SPaolo Bonzini } 231c2b38b27SPaolo Bonzini 232c2b38b27SPaolo Bonzini return *timeout == 0; 233c2b38b27SPaolo Bonzini } 234c2b38b27SPaolo Bonzini 235c2b38b27SPaolo Bonzini static gboolean 236c2b38b27SPaolo Bonzini aio_ctx_check(GSource *source) 237c2b38b27SPaolo Bonzini { 238c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 239c2b38b27SPaolo Bonzini QEMUBH *bh; 240c2b38b27SPaolo Bonzini 241c2b38b27SPaolo Bonzini atomic_and(&ctx->notify_me, ~1); 242c2b38b27SPaolo Bonzini aio_notify_accept(ctx); 243c2b38b27SPaolo Bonzini 244c2b38b27SPaolo Bonzini for (bh = ctx->first_bh; bh; bh = bh->next) { 245c2b38b27SPaolo Bonzini if (bh->scheduled) { 246c2b38b27SPaolo Bonzini return true; 247c2b38b27SPaolo Bonzini } 248c2b38b27SPaolo Bonzini } 249c2b38b27SPaolo Bonzini return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0); 250c2b38b27SPaolo Bonzini } 251c2b38b27SPaolo Bonzini 252c2b38b27SPaolo Bonzini static gboolean 253c2b38b27SPaolo Bonzini aio_ctx_dispatch(GSource *source, 254c2b38b27SPaolo Bonzini GSourceFunc callback, 255c2b38b27SPaolo Bonzini gpointer user_data) 256c2b38b27SPaolo Bonzini { 257c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 258c2b38b27SPaolo Bonzini 259c2b38b27SPaolo Bonzini assert(callback == NULL); 260a153bf52SPaolo Bonzini aio_dispatch(ctx); 261c2b38b27SPaolo Bonzini return true; 262c2b38b27SPaolo Bonzini } 263c2b38b27SPaolo Bonzini 264c2b38b27SPaolo Bonzini static void 265c2b38b27SPaolo Bonzini aio_ctx_finalize(GSource *source) 266c2b38b27SPaolo Bonzini { 267c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 268c2b38b27SPaolo Bonzini 269c2b38b27SPaolo Bonzini thread_pool_free(ctx->thread_pool); 270c2b38b27SPaolo Bonzini 271c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 272c2b38b27SPaolo Bonzini if (ctx->linux_aio) { 273c2b38b27SPaolo Bonzini laio_detach_aio_context(ctx->linux_aio, ctx); 274c2b38b27SPaolo Bonzini laio_cleanup(ctx->linux_aio); 275c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 276c2b38b27SPaolo Bonzini } 277c2b38b27SPaolo Bonzini #endif 278c2b38b27SPaolo Bonzini 2790c330a73SPaolo Bonzini assert(QSLIST_EMPTY(&ctx->scheduled_coroutines)); 2800c330a73SPaolo Bonzini qemu_bh_delete(ctx->co_schedule_bh); 2810c330a73SPaolo Bonzini 282c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 283c2b38b27SPaolo Bonzini assert(!qemu_lockcnt_count(&ctx->list_lock)); 284c2b38b27SPaolo Bonzini while (ctx->first_bh) { 285c2b38b27SPaolo Bonzini QEMUBH *next = ctx->first_bh->next; 286c2b38b27SPaolo Bonzini 287c2b38b27SPaolo Bonzini /* qemu_bh_delete() must have been called on BHs in this AioContext */ 288c2b38b27SPaolo Bonzini assert(ctx->first_bh->deleted); 289c2b38b27SPaolo Bonzini 290c2b38b27SPaolo Bonzini g_free(ctx->first_bh); 291c2b38b27SPaolo Bonzini ctx->first_bh = next; 292c2b38b27SPaolo Bonzini } 293c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 294c2b38b27SPaolo Bonzini 295c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); 296c2b38b27SPaolo Bonzini event_notifier_cleanup(&ctx->notifier); 297c2b38b27SPaolo Bonzini qemu_rec_mutex_destroy(&ctx->lock); 298c2b38b27SPaolo Bonzini qemu_lockcnt_destroy(&ctx->list_lock); 299c2b38b27SPaolo Bonzini timerlistgroup_deinit(&ctx->tlg); 300cd0a6d2bSJie Wang aio_context_destroy(ctx); 301c2b38b27SPaolo Bonzini } 302c2b38b27SPaolo Bonzini 303c2b38b27SPaolo Bonzini static GSourceFuncs aio_source_funcs = { 304c2b38b27SPaolo Bonzini aio_ctx_prepare, 305c2b38b27SPaolo Bonzini aio_ctx_check, 306c2b38b27SPaolo Bonzini aio_ctx_dispatch, 307c2b38b27SPaolo Bonzini aio_ctx_finalize 308c2b38b27SPaolo Bonzini }; 309c2b38b27SPaolo Bonzini 310c2b38b27SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx) 311c2b38b27SPaolo Bonzini { 312c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 313c2b38b27SPaolo Bonzini return &ctx->source; 314c2b38b27SPaolo Bonzini } 315c2b38b27SPaolo Bonzini 316c2b38b27SPaolo Bonzini ThreadPool *aio_get_thread_pool(AioContext *ctx) 317c2b38b27SPaolo Bonzini { 318c2b38b27SPaolo Bonzini if (!ctx->thread_pool) { 319c2b38b27SPaolo Bonzini ctx->thread_pool = thread_pool_new(ctx); 320c2b38b27SPaolo Bonzini } 321c2b38b27SPaolo Bonzini return ctx->thread_pool; 322c2b38b27SPaolo Bonzini } 323c2b38b27SPaolo Bonzini 324c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 325ed6e2161SNishanth Aravamudan LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp) 326c2b38b27SPaolo Bonzini { 327c2b38b27SPaolo Bonzini if (!ctx->linux_aio) { 328ed6e2161SNishanth Aravamudan ctx->linux_aio = laio_init(errp); 329ed6e2161SNishanth Aravamudan if (ctx->linux_aio) { 330c2b38b27SPaolo Bonzini laio_attach_aio_context(ctx->linux_aio, ctx); 331c2b38b27SPaolo Bonzini } 332ed6e2161SNishanth Aravamudan } 333ed6e2161SNishanth Aravamudan return ctx->linux_aio; 334ed6e2161SNishanth Aravamudan } 335ed6e2161SNishanth Aravamudan 336ed6e2161SNishanth Aravamudan LinuxAioState *aio_get_linux_aio(AioContext *ctx) 337ed6e2161SNishanth Aravamudan { 338ed6e2161SNishanth Aravamudan assert(ctx->linux_aio); 339c2b38b27SPaolo Bonzini return ctx->linux_aio; 340c2b38b27SPaolo Bonzini } 341c2b38b27SPaolo Bonzini #endif 342c2b38b27SPaolo Bonzini 343c2b38b27SPaolo Bonzini void aio_notify(AioContext *ctx) 344c2b38b27SPaolo Bonzini { 345c2b38b27SPaolo Bonzini /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs 346c2b38b27SPaolo Bonzini * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. 347c2b38b27SPaolo Bonzini */ 348c2b38b27SPaolo Bonzini smp_mb(); 349c2b38b27SPaolo Bonzini if (ctx->notify_me) { 350c2b38b27SPaolo Bonzini event_notifier_set(&ctx->notifier); 351c2b38b27SPaolo Bonzini atomic_mb_set(&ctx->notified, true); 352c2b38b27SPaolo Bonzini } 353c2b38b27SPaolo Bonzini } 354c2b38b27SPaolo Bonzini 355c2b38b27SPaolo Bonzini void aio_notify_accept(AioContext *ctx) 356c2b38b27SPaolo Bonzini { 357c2b38b27SPaolo Bonzini if (atomic_xchg(&ctx->notified, false)) { 358c2b38b27SPaolo Bonzini event_notifier_test_and_clear(&ctx->notifier); 359c2b38b27SPaolo Bonzini } 360c2b38b27SPaolo Bonzini } 361c2b38b27SPaolo Bonzini 3623f53bc61SPaolo Bonzini static void aio_timerlist_notify(void *opaque, QEMUClockType type) 363c2b38b27SPaolo Bonzini { 364c2b38b27SPaolo Bonzini aio_notify(opaque); 365c2b38b27SPaolo Bonzini } 366c2b38b27SPaolo Bonzini 367c2b38b27SPaolo Bonzini static void event_notifier_dummy_cb(EventNotifier *e) 368c2b38b27SPaolo Bonzini { 369c2b38b27SPaolo Bonzini } 370c2b38b27SPaolo Bonzini 371c2b38b27SPaolo Bonzini /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */ 372c2b38b27SPaolo Bonzini static bool event_notifier_poll(void *opaque) 373c2b38b27SPaolo Bonzini { 374c2b38b27SPaolo Bonzini EventNotifier *e = opaque; 375c2b38b27SPaolo Bonzini AioContext *ctx = container_of(e, AioContext, notifier); 376c2b38b27SPaolo Bonzini 377c2b38b27SPaolo Bonzini return atomic_read(&ctx->notified); 378c2b38b27SPaolo Bonzini } 379c2b38b27SPaolo Bonzini 3800c330a73SPaolo Bonzini static void co_schedule_bh_cb(void *opaque) 3810c330a73SPaolo Bonzini { 3820c330a73SPaolo Bonzini AioContext *ctx = opaque; 3830c330a73SPaolo Bonzini QSLIST_HEAD(, Coroutine) straight, reversed; 3840c330a73SPaolo Bonzini 3850c330a73SPaolo Bonzini QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines); 3860c330a73SPaolo Bonzini QSLIST_INIT(&straight); 3870c330a73SPaolo Bonzini 3880c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&reversed)) { 3890c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&reversed); 3900c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next); 3910c330a73SPaolo Bonzini QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next); 3920c330a73SPaolo Bonzini } 3930c330a73SPaolo Bonzini 3940c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&straight)) { 3950c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&straight); 3960c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); 3970c330a73SPaolo Bonzini trace_aio_co_schedule_bh_cb(ctx, co); 3981919631eSPaolo Bonzini aio_context_acquire(ctx); 3996133b39fSJeff Cody 4006133b39fSJeff Cody /* Protected by write barrier in qemu_aio_coroutine_enter */ 4016133b39fSJeff Cody atomic_set(&co->scheduled, NULL); 4026808ae04SSergio Lopez qemu_aio_coroutine_enter(ctx, co); 4031919631eSPaolo Bonzini aio_context_release(ctx); 4040c330a73SPaolo Bonzini } 4050c330a73SPaolo Bonzini } 4060c330a73SPaolo Bonzini 407c2b38b27SPaolo Bonzini AioContext *aio_context_new(Error **errp) 408c2b38b27SPaolo Bonzini { 409c2b38b27SPaolo Bonzini int ret; 410c2b38b27SPaolo Bonzini AioContext *ctx; 411c2b38b27SPaolo Bonzini 412c2b38b27SPaolo Bonzini ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); 413c2b38b27SPaolo Bonzini aio_context_setup(ctx); 414c2b38b27SPaolo Bonzini 415c2b38b27SPaolo Bonzini ret = event_notifier_init(&ctx->notifier, false); 416c2b38b27SPaolo Bonzini if (ret < 0) { 417c2b38b27SPaolo Bonzini error_setg_errno(errp, -ret, "Failed to initialize event notifier"); 418c2b38b27SPaolo Bonzini goto fail; 419c2b38b27SPaolo Bonzini } 420c2b38b27SPaolo Bonzini g_source_set_can_recurse(&ctx->source, true); 421c2b38b27SPaolo Bonzini qemu_lockcnt_init(&ctx->list_lock); 4220c330a73SPaolo Bonzini 4230c330a73SPaolo Bonzini ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx); 4240c330a73SPaolo Bonzini QSLIST_INIT(&ctx->scheduled_coroutines); 4250c330a73SPaolo Bonzini 426c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, 427c2b38b27SPaolo Bonzini false, 428c2b38b27SPaolo Bonzini (EventNotifierHandler *) 429c2b38b27SPaolo Bonzini event_notifier_dummy_cb, 430c2b38b27SPaolo Bonzini event_notifier_poll); 431c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 432c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 433c2b38b27SPaolo Bonzini #endif 434c2b38b27SPaolo Bonzini ctx->thread_pool = NULL; 435c2b38b27SPaolo Bonzini qemu_rec_mutex_init(&ctx->lock); 436c2b38b27SPaolo Bonzini timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); 437c2b38b27SPaolo Bonzini 438c2b38b27SPaolo Bonzini ctx->poll_ns = 0; 439c2b38b27SPaolo Bonzini ctx->poll_max_ns = 0; 440c2b38b27SPaolo Bonzini ctx->poll_grow = 0; 441c2b38b27SPaolo Bonzini ctx->poll_shrink = 0; 442c2b38b27SPaolo Bonzini 443c2b38b27SPaolo Bonzini return ctx; 444c2b38b27SPaolo Bonzini fail: 445c2b38b27SPaolo Bonzini g_source_destroy(&ctx->source); 446c2b38b27SPaolo Bonzini return NULL; 447c2b38b27SPaolo Bonzini } 448c2b38b27SPaolo Bonzini 4490c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, Coroutine *co) 4500c330a73SPaolo Bonzini { 4510c330a73SPaolo Bonzini trace_aio_co_schedule(ctx, co); 4526133b39fSJeff Cody const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, 4536133b39fSJeff Cody __func__); 4546133b39fSJeff Cody 4556133b39fSJeff Cody if (scheduled) { 4566133b39fSJeff Cody fprintf(stderr, 4576133b39fSJeff Cody "%s: Co-routine was already scheduled in '%s'\n", 4586133b39fSJeff Cody __func__, scheduled); 4596133b39fSJeff Cody abort(); 4606133b39fSJeff Cody } 4616133b39fSJeff Cody 462*f0f81002SStefan Hajnoczi /* The coroutine might run and release the last ctx reference before we 463*f0f81002SStefan Hajnoczi * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until 464*f0f81002SStefan Hajnoczi * we're done. 465*f0f81002SStefan Hajnoczi */ 466*f0f81002SStefan Hajnoczi aio_context_ref(ctx); 467*f0f81002SStefan Hajnoczi 4680c330a73SPaolo Bonzini QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, 4690c330a73SPaolo Bonzini co, co_scheduled_next); 4700c330a73SPaolo Bonzini qemu_bh_schedule(ctx->co_schedule_bh); 471*f0f81002SStefan Hajnoczi 472*f0f81002SStefan Hajnoczi aio_context_unref(ctx); 4730c330a73SPaolo Bonzini } 4740c330a73SPaolo Bonzini 4750c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co) 4760c330a73SPaolo Bonzini { 4770c330a73SPaolo Bonzini AioContext *ctx; 4780c330a73SPaolo Bonzini 4790c330a73SPaolo Bonzini /* Read coroutine before co->ctx. Matches smp_wmb in 4800c330a73SPaolo Bonzini * qemu_coroutine_enter. 4810c330a73SPaolo Bonzini */ 4820c330a73SPaolo Bonzini smp_read_barrier_depends(); 4830c330a73SPaolo Bonzini ctx = atomic_read(&co->ctx); 4840c330a73SPaolo Bonzini 4858865852eSFam Zheng aio_co_enter(ctx, co); 4868865852eSFam Zheng } 4878865852eSFam Zheng 4888865852eSFam Zheng void aio_co_enter(AioContext *ctx, struct Coroutine *co) 4898865852eSFam Zheng { 4900c330a73SPaolo Bonzini if (ctx != qemu_get_current_aio_context()) { 4910c330a73SPaolo Bonzini aio_co_schedule(ctx, co); 4920c330a73SPaolo Bonzini return; 4930c330a73SPaolo Bonzini } 4940c330a73SPaolo Bonzini 4950c330a73SPaolo Bonzini if (qemu_in_coroutine()) { 4960c330a73SPaolo Bonzini Coroutine *self = qemu_coroutine_self(); 4970c330a73SPaolo Bonzini assert(self != co); 4980c330a73SPaolo Bonzini QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); 4990c330a73SPaolo Bonzini } else { 5000c330a73SPaolo Bonzini aio_context_acquire(ctx); 5018865852eSFam Zheng qemu_aio_coroutine_enter(ctx, co); 5020c330a73SPaolo Bonzini aio_context_release(ctx); 5030c330a73SPaolo Bonzini } 5040c330a73SPaolo Bonzini } 5050c330a73SPaolo Bonzini 506c2b38b27SPaolo Bonzini void aio_context_ref(AioContext *ctx) 507c2b38b27SPaolo Bonzini { 508c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 509c2b38b27SPaolo Bonzini } 510c2b38b27SPaolo Bonzini 511c2b38b27SPaolo Bonzini void aio_context_unref(AioContext *ctx) 512c2b38b27SPaolo Bonzini { 513c2b38b27SPaolo Bonzini g_source_unref(&ctx->source); 514c2b38b27SPaolo Bonzini } 515c2b38b27SPaolo Bonzini 516c2b38b27SPaolo Bonzini void aio_context_acquire(AioContext *ctx) 517c2b38b27SPaolo Bonzini { 518c2b38b27SPaolo Bonzini qemu_rec_mutex_lock(&ctx->lock); 519c2b38b27SPaolo Bonzini } 520c2b38b27SPaolo Bonzini 521c2b38b27SPaolo Bonzini void aio_context_release(AioContext *ctx) 522c2b38b27SPaolo Bonzini { 523c2b38b27SPaolo Bonzini qemu_rec_mutex_unlock(&ctx->lock); 524c2b38b27SPaolo Bonzini } 525