1c2b38b27SPaolo Bonzini /* 2c2b38b27SPaolo Bonzini * Data plane event loop 3c2b38b27SPaolo Bonzini * 4c2b38b27SPaolo Bonzini * Copyright (c) 2003-2008 Fabrice Bellard 5c2b38b27SPaolo Bonzini * Copyright (c) 2009-2017 QEMU contributors 6c2b38b27SPaolo Bonzini * 7c2b38b27SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy 8c2b38b27SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal 9c2b38b27SPaolo Bonzini * in the Software without restriction, including without limitation the rights 10c2b38b27SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11c2b38b27SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is 12c2b38b27SPaolo Bonzini * furnished to do so, subject to the following conditions: 13c2b38b27SPaolo Bonzini * 14c2b38b27SPaolo Bonzini * The above copyright notice and this permission notice shall be included in 15c2b38b27SPaolo Bonzini * all copies or substantial portions of the Software. 16c2b38b27SPaolo Bonzini * 17c2b38b27SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18c2b38b27SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19c2b38b27SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20c2b38b27SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21c2b38b27SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22c2b38b27SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23c2b38b27SPaolo Bonzini * THE SOFTWARE. 24c2b38b27SPaolo Bonzini */ 25c2b38b27SPaolo Bonzini 26c2b38b27SPaolo Bonzini #include "qemu/osdep.h" 27c2b38b27SPaolo Bonzini #include "qapi/error.h" 28c2b38b27SPaolo Bonzini #include "qemu-common.h" 29c2b38b27SPaolo Bonzini #include "block/aio.h" 30c2b38b27SPaolo Bonzini #include "block/thread-pool.h" 31c2b38b27SPaolo Bonzini #include "qemu/main-loop.h" 32c2b38b27SPaolo Bonzini #include "qemu/atomic.h" 33c2b38b27SPaolo Bonzini #include "block/raw-aio.h" 340c330a73SPaolo Bonzini #include "qemu/coroutine_int.h" 350c330a73SPaolo Bonzini #include "trace.h" 36c2b38b27SPaolo Bonzini 37c2b38b27SPaolo Bonzini /***********************************************************/ 38c2b38b27SPaolo Bonzini /* bottom halves (can be seen as timers which expire ASAP) */ 39c2b38b27SPaolo Bonzini 40c2b38b27SPaolo Bonzini struct QEMUBH { 41c2b38b27SPaolo Bonzini AioContext *ctx; 42c2b38b27SPaolo Bonzini QEMUBHFunc *cb; 43c2b38b27SPaolo Bonzini void *opaque; 44c2b38b27SPaolo Bonzini QEMUBH *next; 45c2b38b27SPaolo Bonzini bool scheduled; 46c2b38b27SPaolo Bonzini bool idle; 47c2b38b27SPaolo Bonzini bool deleted; 48c2b38b27SPaolo Bonzini }; 49c2b38b27SPaolo Bonzini 50c2b38b27SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 51c2b38b27SPaolo Bonzini { 52c2b38b27SPaolo Bonzini QEMUBH *bh; 53c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 54c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 55c2b38b27SPaolo Bonzini .ctx = ctx, 56c2b38b27SPaolo Bonzini .cb = cb, 57c2b38b27SPaolo Bonzini .opaque = opaque, 58c2b38b27SPaolo Bonzini }; 59c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 60c2b38b27SPaolo Bonzini bh->next = ctx->first_bh; 61c2b38b27SPaolo Bonzini bh->scheduled = 1; 62c2b38b27SPaolo Bonzini bh->deleted = 1; 63c2b38b27SPaolo Bonzini /* Make sure that the members are ready before putting bh into list */ 64c2b38b27SPaolo Bonzini smp_wmb(); 65c2b38b27SPaolo Bonzini ctx->first_bh = bh; 66c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 67c2b38b27SPaolo Bonzini aio_notify(ctx); 68c2b38b27SPaolo Bonzini } 69c2b38b27SPaolo Bonzini 70c2b38b27SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 71c2b38b27SPaolo Bonzini { 72c2b38b27SPaolo Bonzini QEMUBH *bh; 73c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 74c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 75c2b38b27SPaolo Bonzini .ctx = ctx, 76c2b38b27SPaolo Bonzini .cb = cb, 77c2b38b27SPaolo Bonzini .opaque = opaque, 78c2b38b27SPaolo Bonzini }; 79c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 80c2b38b27SPaolo Bonzini bh->next = ctx->first_bh; 81c2b38b27SPaolo Bonzini /* Make sure that the members are ready before putting bh into list */ 82c2b38b27SPaolo Bonzini smp_wmb(); 83c2b38b27SPaolo Bonzini ctx->first_bh = bh; 84c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 85c2b38b27SPaolo Bonzini return bh; 86c2b38b27SPaolo Bonzini } 87c2b38b27SPaolo Bonzini 88c2b38b27SPaolo Bonzini void aio_bh_call(QEMUBH *bh) 89c2b38b27SPaolo Bonzini { 90c2b38b27SPaolo Bonzini bh->cb(bh->opaque); 91c2b38b27SPaolo Bonzini } 92c2b38b27SPaolo Bonzini 93bd451435SPaolo Bonzini /* Multiple occurrences of aio_bh_poll cannot be called concurrently. 94bd451435SPaolo Bonzini * The count in ctx->list_lock is incremented before the call, and is 95bd451435SPaolo Bonzini * not affected by the call. 96bd451435SPaolo Bonzini */ 97c2b38b27SPaolo Bonzini int aio_bh_poll(AioContext *ctx) 98c2b38b27SPaolo Bonzini { 99c2b38b27SPaolo Bonzini QEMUBH *bh, **bhp, *next; 100c2b38b27SPaolo Bonzini int ret; 101c2b38b27SPaolo Bonzini bool deleted = false; 102c2b38b27SPaolo Bonzini 103c2b38b27SPaolo Bonzini ret = 0; 104c2b38b27SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) { 105c2b38b27SPaolo Bonzini next = atomic_rcu_read(&bh->next); 106c2b38b27SPaolo Bonzini /* The atomic_xchg is paired with the one in qemu_bh_schedule. The 107c2b38b27SPaolo Bonzini * implicit memory barrier ensures that the callback sees all writes 108c2b38b27SPaolo Bonzini * done by the scheduling thread. It also ensures that the scheduling 109c2b38b27SPaolo Bonzini * thread sees the zero before bh->cb has run, and thus will call 110c2b38b27SPaolo Bonzini * aio_notify again if necessary. 111c2b38b27SPaolo Bonzini */ 112c2b38b27SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 0)) { 113c2b38b27SPaolo Bonzini /* Idle BHs don't count as progress */ 114c2b38b27SPaolo Bonzini if (!bh->idle) { 115c2b38b27SPaolo Bonzini ret = 1; 116c2b38b27SPaolo Bonzini } 117c2b38b27SPaolo Bonzini bh->idle = 0; 118c2b38b27SPaolo Bonzini aio_bh_call(bh); 119c2b38b27SPaolo Bonzini } 120c2b38b27SPaolo Bonzini if (bh->deleted) { 121c2b38b27SPaolo Bonzini deleted = true; 122c2b38b27SPaolo Bonzini } 123c2b38b27SPaolo Bonzini } 124c2b38b27SPaolo Bonzini 125c2b38b27SPaolo Bonzini /* remove deleted bhs */ 126c2b38b27SPaolo Bonzini if (!deleted) { 127c2b38b27SPaolo Bonzini return ret; 128c2b38b27SPaolo Bonzini } 129c2b38b27SPaolo Bonzini 130bd451435SPaolo Bonzini if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { 131c2b38b27SPaolo Bonzini bhp = &ctx->first_bh; 132c2b38b27SPaolo Bonzini while (*bhp) { 133c2b38b27SPaolo Bonzini bh = *bhp; 134c2b38b27SPaolo Bonzini if (bh->deleted && !bh->scheduled) { 135c2b38b27SPaolo Bonzini *bhp = bh->next; 136c2b38b27SPaolo Bonzini g_free(bh); 137c2b38b27SPaolo Bonzini } else { 138c2b38b27SPaolo Bonzini bhp = &bh->next; 139c2b38b27SPaolo Bonzini } 140c2b38b27SPaolo Bonzini } 141bd451435SPaolo Bonzini qemu_lockcnt_inc_and_unlock(&ctx->list_lock); 142c2b38b27SPaolo Bonzini } 143c2b38b27SPaolo Bonzini return ret; 144c2b38b27SPaolo Bonzini } 145c2b38b27SPaolo Bonzini 146c2b38b27SPaolo Bonzini void qemu_bh_schedule_idle(QEMUBH *bh) 147c2b38b27SPaolo Bonzini { 148c2b38b27SPaolo Bonzini bh->idle = 1; 149c2b38b27SPaolo Bonzini /* Make sure that idle & any writes needed by the callback are done 150c2b38b27SPaolo Bonzini * before the locations are read in the aio_bh_poll. 151c2b38b27SPaolo Bonzini */ 152c2b38b27SPaolo Bonzini atomic_mb_set(&bh->scheduled, 1); 153c2b38b27SPaolo Bonzini } 154c2b38b27SPaolo Bonzini 155c2b38b27SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh) 156c2b38b27SPaolo Bonzini { 157c2b38b27SPaolo Bonzini AioContext *ctx; 158c2b38b27SPaolo Bonzini 159c2b38b27SPaolo Bonzini ctx = bh->ctx; 160c2b38b27SPaolo Bonzini bh->idle = 0; 161c2b38b27SPaolo Bonzini /* The memory barrier implicit in atomic_xchg makes sure that: 162c2b38b27SPaolo Bonzini * 1. idle & any writes needed by the callback are done before the 163c2b38b27SPaolo Bonzini * locations are read in the aio_bh_poll. 164c2b38b27SPaolo Bonzini * 2. ctx is loaded before scheduled is set and the callback has a chance 165c2b38b27SPaolo Bonzini * to execute. 166c2b38b27SPaolo Bonzini */ 167c2b38b27SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 1) == 0) { 168c2b38b27SPaolo Bonzini aio_notify(ctx); 169c2b38b27SPaolo Bonzini } 170c2b38b27SPaolo Bonzini } 171c2b38b27SPaolo Bonzini 172c2b38b27SPaolo Bonzini 173c2b38b27SPaolo Bonzini /* This func is async. 174c2b38b27SPaolo Bonzini */ 175c2b38b27SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh) 176c2b38b27SPaolo Bonzini { 177ef6dada8SSergio Lopez atomic_mb_set(&bh->scheduled, 0); 178c2b38b27SPaolo Bonzini } 179c2b38b27SPaolo Bonzini 180c2b38b27SPaolo Bonzini /* This func is async.The bottom half will do the delete action at the finial 181c2b38b27SPaolo Bonzini * end. 182c2b38b27SPaolo Bonzini */ 183c2b38b27SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh) 184c2b38b27SPaolo Bonzini { 185c2b38b27SPaolo Bonzini bh->scheduled = 0; 186c2b38b27SPaolo Bonzini bh->deleted = 1; 187c2b38b27SPaolo Bonzini } 188c2b38b27SPaolo Bonzini 189c2b38b27SPaolo Bonzini int64_t 190c2b38b27SPaolo Bonzini aio_compute_timeout(AioContext *ctx) 191c2b38b27SPaolo Bonzini { 192c2b38b27SPaolo Bonzini int64_t deadline; 193c2b38b27SPaolo Bonzini int timeout = -1; 194c2b38b27SPaolo Bonzini QEMUBH *bh; 195c2b38b27SPaolo Bonzini 196c2b38b27SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; 197c2b38b27SPaolo Bonzini bh = atomic_rcu_read(&bh->next)) { 198c2b38b27SPaolo Bonzini if (bh->scheduled) { 199c2b38b27SPaolo Bonzini if (bh->idle) { 200c2b38b27SPaolo Bonzini /* idle bottom halves will be polled at least 201c2b38b27SPaolo Bonzini * every 10ms */ 202c2b38b27SPaolo Bonzini timeout = 10000000; 203c2b38b27SPaolo Bonzini } else { 204c2b38b27SPaolo Bonzini /* non-idle bottom halves will be executed 205c2b38b27SPaolo Bonzini * immediately */ 206c2b38b27SPaolo Bonzini return 0; 207c2b38b27SPaolo Bonzini } 208c2b38b27SPaolo Bonzini } 209c2b38b27SPaolo Bonzini } 210c2b38b27SPaolo Bonzini 211c2b38b27SPaolo Bonzini deadline = timerlistgroup_deadline_ns(&ctx->tlg); 212c2b38b27SPaolo Bonzini if (deadline == 0) { 213c2b38b27SPaolo Bonzini return 0; 214c2b38b27SPaolo Bonzini } else { 215c2b38b27SPaolo Bonzini return qemu_soonest_timeout(timeout, deadline); 216c2b38b27SPaolo Bonzini } 217c2b38b27SPaolo Bonzini } 218c2b38b27SPaolo Bonzini 219c2b38b27SPaolo Bonzini static gboolean 220c2b38b27SPaolo Bonzini aio_ctx_prepare(GSource *source, gint *timeout) 221c2b38b27SPaolo Bonzini { 222c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 223c2b38b27SPaolo Bonzini 224c2b38b27SPaolo Bonzini atomic_or(&ctx->notify_me, 1); 225c2b38b27SPaolo Bonzini 226c2b38b27SPaolo Bonzini /* We assume there is no timeout already supplied */ 227c2b38b27SPaolo Bonzini *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); 228c2b38b27SPaolo Bonzini 229c2b38b27SPaolo Bonzini if (aio_prepare(ctx)) { 230c2b38b27SPaolo Bonzini *timeout = 0; 231c2b38b27SPaolo Bonzini } 232c2b38b27SPaolo Bonzini 233c2b38b27SPaolo Bonzini return *timeout == 0; 234c2b38b27SPaolo Bonzini } 235c2b38b27SPaolo Bonzini 236c2b38b27SPaolo Bonzini static gboolean 237c2b38b27SPaolo Bonzini aio_ctx_check(GSource *source) 238c2b38b27SPaolo Bonzini { 239c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 240c2b38b27SPaolo Bonzini QEMUBH *bh; 241c2b38b27SPaolo Bonzini 242c2b38b27SPaolo Bonzini atomic_and(&ctx->notify_me, ~1); 243c2b38b27SPaolo Bonzini aio_notify_accept(ctx); 244c2b38b27SPaolo Bonzini 245c2b38b27SPaolo Bonzini for (bh = ctx->first_bh; bh; bh = bh->next) { 246c2b38b27SPaolo Bonzini if (bh->scheduled) { 247c2b38b27SPaolo Bonzini return true; 248c2b38b27SPaolo Bonzini } 249c2b38b27SPaolo Bonzini } 250c2b38b27SPaolo Bonzini return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0); 251c2b38b27SPaolo Bonzini } 252c2b38b27SPaolo Bonzini 253c2b38b27SPaolo Bonzini static gboolean 254c2b38b27SPaolo Bonzini aio_ctx_dispatch(GSource *source, 255c2b38b27SPaolo Bonzini GSourceFunc callback, 256c2b38b27SPaolo Bonzini gpointer user_data) 257c2b38b27SPaolo Bonzini { 258c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 259c2b38b27SPaolo Bonzini 260c2b38b27SPaolo Bonzini assert(callback == NULL); 261a153bf52SPaolo Bonzini aio_dispatch(ctx); 262c2b38b27SPaolo Bonzini return true; 263c2b38b27SPaolo Bonzini } 264c2b38b27SPaolo Bonzini 265c2b38b27SPaolo Bonzini static void 266c2b38b27SPaolo Bonzini aio_ctx_finalize(GSource *source) 267c2b38b27SPaolo Bonzini { 268c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 269c2b38b27SPaolo Bonzini 270c2b38b27SPaolo Bonzini thread_pool_free(ctx->thread_pool); 271c2b38b27SPaolo Bonzini 272c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 273c2b38b27SPaolo Bonzini if (ctx->linux_aio) { 274c2b38b27SPaolo Bonzini laio_detach_aio_context(ctx->linux_aio, ctx); 275c2b38b27SPaolo Bonzini laio_cleanup(ctx->linux_aio); 276c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 277c2b38b27SPaolo Bonzini } 278c2b38b27SPaolo Bonzini #endif 279c2b38b27SPaolo Bonzini 2800c330a73SPaolo Bonzini assert(QSLIST_EMPTY(&ctx->scheduled_coroutines)); 2810c330a73SPaolo Bonzini qemu_bh_delete(ctx->co_schedule_bh); 2820c330a73SPaolo Bonzini 283c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 284c2b38b27SPaolo Bonzini assert(!qemu_lockcnt_count(&ctx->list_lock)); 285c2b38b27SPaolo Bonzini while (ctx->first_bh) { 286c2b38b27SPaolo Bonzini QEMUBH *next = ctx->first_bh->next; 287c2b38b27SPaolo Bonzini 288c2b38b27SPaolo Bonzini /* qemu_bh_delete() must have been called on BHs in this AioContext */ 289c2b38b27SPaolo Bonzini assert(ctx->first_bh->deleted); 290c2b38b27SPaolo Bonzini 291c2b38b27SPaolo Bonzini g_free(ctx->first_bh); 292c2b38b27SPaolo Bonzini ctx->first_bh = next; 293c2b38b27SPaolo Bonzini } 294c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 295c2b38b27SPaolo Bonzini 296c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); 297c2b38b27SPaolo Bonzini event_notifier_cleanup(&ctx->notifier); 298c2b38b27SPaolo Bonzini qemu_rec_mutex_destroy(&ctx->lock); 299c2b38b27SPaolo Bonzini qemu_lockcnt_destroy(&ctx->list_lock); 300c2b38b27SPaolo Bonzini timerlistgroup_deinit(&ctx->tlg); 301cd0a6d2bSJie Wang aio_context_destroy(ctx); 302c2b38b27SPaolo Bonzini } 303c2b38b27SPaolo Bonzini 304c2b38b27SPaolo Bonzini static GSourceFuncs aio_source_funcs = { 305c2b38b27SPaolo Bonzini aio_ctx_prepare, 306c2b38b27SPaolo Bonzini aio_ctx_check, 307c2b38b27SPaolo Bonzini aio_ctx_dispatch, 308c2b38b27SPaolo Bonzini aio_ctx_finalize 309c2b38b27SPaolo Bonzini }; 310c2b38b27SPaolo Bonzini 311c2b38b27SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx) 312c2b38b27SPaolo Bonzini { 313c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 314c2b38b27SPaolo Bonzini return &ctx->source; 315c2b38b27SPaolo Bonzini } 316c2b38b27SPaolo Bonzini 317c2b38b27SPaolo Bonzini ThreadPool *aio_get_thread_pool(AioContext *ctx) 318c2b38b27SPaolo Bonzini { 319c2b38b27SPaolo Bonzini if (!ctx->thread_pool) { 320c2b38b27SPaolo Bonzini ctx->thread_pool = thread_pool_new(ctx); 321c2b38b27SPaolo Bonzini } 322c2b38b27SPaolo Bonzini return ctx->thread_pool; 323c2b38b27SPaolo Bonzini } 324c2b38b27SPaolo Bonzini 325c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 326*ed6e2161SNishanth Aravamudan LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp) 327c2b38b27SPaolo Bonzini { 328c2b38b27SPaolo Bonzini if (!ctx->linux_aio) { 329*ed6e2161SNishanth Aravamudan ctx->linux_aio = laio_init(errp); 330*ed6e2161SNishanth Aravamudan if (ctx->linux_aio) { 331c2b38b27SPaolo Bonzini laio_attach_aio_context(ctx->linux_aio, ctx); 332c2b38b27SPaolo Bonzini } 333*ed6e2161SNishanth Aravamudan } 334*ed6e2161SNishanth Aravamudan return ctx->linux_aio; 335*ed6e2161SNishanth Aravamudan } 336*ed6e2161SNishanth Aravamudan 337*ed6e2161SNishanth Aravamudan LinuxAioState *aio_get_linux_aio(AioContext *ctx) 338*ed6e2161SNishanth Aravamudan { 339*ed6e2161SNishanth Aravamudan assert(ctx->linux_aio); 340c2b38b27SPaolo Bonzini return ctx->linux_aio; 341c2b38b27SPaolo Bonzini } 342c2b38b27SPaolo Bonzini #endif 343c2b38b27SPaolo Bonzini 344c2b38b27SPaolo Bonzini void aio_notify(AioContext *ctx) 345c2b38b27SPaolo Bonzini { 346c2b38b27SPaolo Bonzini /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs 347c2b38b27SPaolo Bonzini * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. 348c2b38b27SPaolo Bonzini */ 349c2b38b27SPaolo Bonzini smp_mb(); 350c2b38b27SPaolo Bonzini if (ctx->notify_me) { 351c2b38b27SPaolo Bonzini event_notifier_set(&ctx->notifier); 352c2b38b27SPaolo Bonzini atomic_mb_set(&ctx->notified, true); 353c2b38b27SPaolo Bonzini } 354c2b38b27SPaolo Bonzini } 355c2b38b27SPaolo Bonzini 356c2b38b27SPaolo Bonzini void aio_notify_accept(AioContext *ctx) 357c2b38b27SPaolo Bonzini { 358c2b38b27SPaolo Bonzini if (atomic_xchg(&ctx->notified, false)) { 359c2b38b27SPaolo Bonzini event_notifier_test_and_clear(&ctx->notifier); 360c2b38b27SPaolo Bonzini } 361c2b38b27SPaolo Bonzini } 362c2b38b27SPaolo Bonzini 3633f53bc61SPaolo Bonzini static void aio_timerlist_notify(void *opaque, QEMUClockType type) 364c2b38b27SPaolo Bonzini { 365c2b38b27SPaolo Bonzini aio_notify(opaque); 366c2b38b27SPaolo Bonzini } 367c2b38b27SPaolo Bonzini 368c2b38b27SPaolo Bonzini static void event_notifier_dummy_cb(EventNotifier *e) 369c2b38b27SPaolo Bonzini { 370c2b38b27SPaolo Bonzini } 371c2b38b27SPaolo Bonzini 372c2b38b27SPaolo Bonzini /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */ 373c2b38b27SPaolo Bonzini static bool event_notifier_poll(void *opaque) 374c2b38b27SPaolo Bonzini { 375c2b38b27SPaolo Bonzini EventNotifier *e = opaque; 376c2b38b27SPaolo Bonzini AioContext *ctx = container_of(e, AioContext, notifier); 377c2b38b27SPaolo Bonzini 378c2b38b27SPaolo Bonzini return atomic_read(&ctx->notified); 379c2b38b27SPaolo Bonzini } 380c2b38b27SPaolo Bonzini 3810c330a73SPaolo Bonzini static void co_schedule_bh_cb(void *opaque) 3820c330a73SPaolo Bonzini { 3830c330a73SPaolo Bonzini AioContext *ctx = opaque; 3840c330a73SPaolo Bonzini QSLIST_HEAD(, Coroutine) straight, reversed; 3850c330a73SPaolo Bonzini 3860c330a73SPaolo Bonzini QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines); 3870c330a73SPaolo Bonzini QSLIST_INIT(&straight); 3880c330a73SPaolo Bonzini 3890c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&reversed)) { 3900c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&reversed); 3910c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next); 3920c330a73SPaolo Bonzini QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next); 3930c330a73SPaolo Bonzini } 3940c330a73SPaolo Bonzini 3950c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&straight)) { 3960c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&straight); 3970c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); 3980c330a73SPaolo Bonzini trace_aio_co_schedule_bh_cb(ctx, co); 3991919631eSPaolo Bonzini aio_context_acquire(ctx); 4006133b39fSJeff Cody 4016133b39fSJeff Cody /* Protected by write barrier in qemu_aio_coroutine_enter */ 4026133b39fSJeff Cody atomic_set(&co->scheduled, NULL); 4030c330a73SPaolo Bonzini qemu_coroutine_enter(co); 4041919631eSPaolo Bonzini aio_context_release(ctx); 4050c330a73SPaolo Bonzini } 4060c330a73SPaolo Bonzini } 4070c330a73SPaolo Bonzini 408c2b38b27SPaolo Bonzini AioContext *aio_context_new(Error **errp) 409c2b38b27SPaolo Bonzini { 410c2b38b27SPaolo Bonzini int ret; 411c2b38b27SPaolo Bonzini AioContext *ctx; 412c2b38b27SPaolo Bonzini 413c2b38b27SPaolo Bonzini ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); 414c2b38b27SPaolo Bonzini aio_context_setup(ctx); 415c2b38b27SPaolo Bonzini 416c2b38b27SPaolo Bonzini ret = event_notifier_init(&ctx->notifier, false); 417c2b38b27SPaolo Bonzini if (ret < 0) { 418c2b38b27SPaolo Bonzini error_setg_errno(errp, -ret, "Failed to initialize event notifier"); 419c2b38b27SPaolo Bonzini goto fail; 420c2b38b27SPaolo Bonzini } 421c2b38b27SPaolo Bonzini g_source_set_can_recurse(&ctx->source, true); 422c2b38b27SPaolo Bonzini qemu_lockcnt_init(&ctx->list_lock); 4230c330a73SPaolo Bonzini 4240c330a73SPaolo Bonzini ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx); 4250c330a73SPaolo Bonzini QSLIST_INIT(&ctx->scheduled_coroutines); 4260c330a73SPaolo Bonzini 427c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, 428c2b38b27SPaolo Bonzini false, 429c2b38b27SPaolo Bonzini (EventNotifierHandler *) 430c2b38b27SPaolo Bonzini event_notifier_dummy_cb, 431c2b38b27SPaolo Bonzini event_notifier_poll); 432c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 433c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 434c2b38b27SPaolo Bonzini #endif 435c2b38b27SPaolo Bonzini ctx->thread_pool = NULL; 436c2b38b27SPaolo Bonzini qemu_rec_mutex_init(&ctx->lock); 437c2b38b27SPaolo Bonzini timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); 438c2b38b27SPaolo Bonzini 439c2b38b27SPaolo Bonzini ctx->poll_ns = 0; 440c2b38b27SPaolo Bonzini ctx->poll_max_ns = 0; 441c2b38b27SPaolo Bonzini ctx->poll_grow = 0; 442c2b38b27SPaolo Bonzini ctx->poll_shrink = 0; 443c2b38b27SPaolo Bonzini 444c2b38b27SPaolo Bonzini return ctx; 445c2b38b27SPaolo Bonzini fail: 446c2b38b27SPaolo Bonzini g_source_destroy(&ctx->source); 447c2b38b27SPaolo Bonzini return NULL; 448c2b38b27SPaolo Bonzini } 449c2b38b27SPaolo Bonzini 4500c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, Coroutine *co) 4510c330a73SPaolo Bonzini { 4520c330a73SPaolo Bonzini trace_aio_co_schedule(ctx, co); 4536133b39fSJeff Cody const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, 4546133b39fSJeff Cody __func__); 4556133b39fSJeff Cody 4566133b39fSJeff Cody if (scheduled) { 4576133b39fSJeff Cody fprintf(stderr, 4586133b39fSJeff Cody "%s: Co-routine was already scheduled in '%s'\n", 4596133b39fSJeff Cody __func__, scheduled); 4606133b39fSJeff Cody abort(); 4616133b39fSJeff Cody } 4626133b39fSJeff Cody 4630c330a73SPaolo Bonzini QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, 4640c330a73SPaolo Bonzini co, co_scheduled_next); 4650c330a73SPaolo Bonzini qemu_bh_schedule(ctx->co_schedule_bh); 4660c330a73SPaolo Bonzini } 4670c330a73SPaolo Bonzini 4680c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co) 4690c330a73SPaolo Bonzini { 4700c330a73SPaolo Bonzini AioContext *ctx; 4710c330a73SPaolo Bonzini 4720c330a73SPaolo Bonzini /* Read coroutine before co->ctx. Matches smp_wmb in 4730c330a73SPaolo Bonzini * qemu_coroutine_enter. 4740c330a73SPaolo Bonzini */ 4750c330a73SPaolo Bonzini smp_read_barrier_depends(); 4760c330a73SPaolo Bonzini ctx = atomic_read(&co->ctx); 4770c330a73SPaolo Bonzini 4788865852eSFam Zheng aio_co_enter(ctx, co); 4798865852eSFam Zheng } 4808865852eSFam Zheng 4818865852eSFam Zheng void aio_co_enter(AioContext *ctx, struct Coroutine *co) 4828865852eSFam Zheng { 4830c330a73SPaolo Bonzini if (ctx != qemu_get_current_aio_context()) { 4840c330a73SPaolo Bonzini aio_co_schedule(ctx, co); 4850c330a73SPaolo Bonzini return; 4860c330a73SPaolo Bonzini } 4870c330a73SPaolo Bonzini 4880c330a73SPaolo Bonzini if (qemu_in_coroutine()) { 4890c330a73SPaolo Bonzini Coroutine *self = qemu_coroutine_self(); 4900c330a73SPaolo Bonzini assert(self != co); 4910c330a73SPaolo Bonzini QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); 4920c330a73SPaolo Bonzini } else { 4930c330a73SPaolo Bonzini aio_context_acquire(ctx); 4948865852eSFam Zheng qemu_aio_coroutine_enter(ctx, co); 4950c330a73SPaolo Bonzini aio_context_release(ctx); 4960c330a73SPaolo Bonzini } 4970c330a73SPaolo Bonzini } 4980c330a73SPaolo Bonzini 499c2b38b27SPaolo Bonzini void aio_context_ref(AioContext *ctx) 500c2b38b27SPaolo Bonzini { 501c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 502c2b38b27SPaolo Bonzini } 503c2b38b27SPaolo Bonzini 504c2b38b27SPaolo Bonzini void aio_context_unref(AioContext *ctx) 505c2b38b27SPaolo Bonzini { 506c2b38b27SPaolo Bonzini g_source_unref(&ctx->source); 507c2b38b27SPaolo Bonzini } 508c2b38b27SPaolo Bonzini 509c2b38b27SPaolo Bonzini void aio_context_acquire(AioContext *ctx) 510c2b38b27SPaolo Bonzini { 511c2b38b27SPaolo Bonzini qemu_rec_mutex_lock(&ctx->lock); 512c2b38b27SPaolo Bonzini } 513c2b38b27SPaolo Bonzini 514c2b38b27SPaolo Bonzini void aio_context_release(AioContext *ctx) 515c2b38b27SPaolo Bonzini { 516c2b38b27SPaolo Bonzini qemu_rec_mutex_unlock(&ctx->lock); 517c2b38b27SPaolo Bonzini } 518