1c2b38b27SPaolo Bonzini /* 2c2b38b27SPaolo Bonzini * Data plane event loop 3c2b38b27SPaolo Bonzini * 4c2b38b27SPaolo Bonzini * Copyright (c) 2003-2008 Fabrice Bellard 5c2b38b27SPaolo Bonzini * Copyright (c) 2009-2017 QEMU contributors 6c2b38b27SPaolo Bonzini * 7c2b38b27SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy 8c2b38b27SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal 9c2b38b27SPaolo Bonzini * in the Software without restriction, including without limitation the rights 10c2b38b27SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11c2b38b27SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is 12c2b38b27SPaolo Bonzini * furnished to do so, subject to the following conditions: 13c2b38b27SPaolo Bonzini * 14c2b38b27SPaolo Bonzini * The above copyright notice and this permission notice shall be included in 15c2b38b27SPaolo Bonzini * all copies or substantial portions of the Software. 16c2b38b27SPaolo Bonzini * 17c2b38b27SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18c2b38b27SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19c2b38b27SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20c2b38b27SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21c2b38b27SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22c2b38b27SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23c2b38b27SPaolo Bonzini * THE SOFTWARE. 24c2b38b27SPaolo Bonzini */ 25c2b38b27SPaolo Bonzini 26c2b38b27SPaolo Bonzini #include "qemu/osdep.h" 27c2b38b27SPaolo Bonzini #include "qapi/error.h" 28c2b38b27SPaolo Bonzini #include "block/aio.h" 29c2b38b27SPaolo Bonzini #include "block/thread-pool.h" 30c2b38b27SPaolo Bonzini #include "qemu/main-loop.h" 31c2b38b27SPaolo Bonzini #include "qemu/atomic.h" 32c2b38b27SPaolo Bonzini #include "block/raw-aio.h" 330c330a73SPaolo Bonzini #include "qemu/coroutine_int.h" 340c330a73SPaolo Bonzini #include "trace.h" 35c2b38b27SPaolo Bonzini 36c2b38b27SPaolo Bonzini /***********************************************************/ 37c2b38b27SPaolo Bonzini /* bottom halves (can be seen as timers which expire ASAP) */ 38c2b38b27SPaolo Bonzini 39c2b38b27SPaolo Bonzini struct QEMUBH { 40c2b38b27SPaolo Bonzini AioContext *ctx; 41c2b38b27SPaolo Bonzini QEMUBHFunc *cb; 42c2b38b27SPaolo Bonzini void *opaque; 43c2b38b27SPaolo Bonzini QEMUBH *next; 44c2b38b27SPaolo Bonzini bool scheduled; 45c2b38b27SPaolo Bonzini bool idle; 46c2b38b27SPaolo Bonzini bool deleted; 47c2b38b27SPaolo Bonzini }; 48c2b38b27SPaolo Bonzini 49c2b38b27SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 50c2b38b27SPaolo Bonzini { 51c2b38b27SPaolo Bonzini QEMUBH *bh; 52c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 53c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 54c2b38b27SPaolo Bonzini .ctx = ctx, 55c2b38b27SPaolo Bonzini .cb = cb, 56c2b38b27SPaolo Bonzini .opaque = opaque, 57c2b38b27SPaolo Bonzini }; 58c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 59c2b38b27SPaolo Bonzini bh->next = ctx->first_bh; 60c2b38b27SPaolo Bonzini bh->scheduled = 1; 61c2b38b27SPaolo Bonzini bh->deleted = 1; 62c2b38b27SPaolo Bonzini /* Make sure that the members are ready before putting bh into list */ 63c2b38b27SPaolo Bonzini smp_wmb(); 64c2b38b27SPaolo Bonzini ctx->first_bh = bh; 65c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 66c2b38b27SPaolo Bonzini aio_notify(ctx); 67c2b38b27SPaolo Bonzini } 68c2b38b27SPaolo Bonzini 69c2b38b27SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 70c2b38b27SPaolo Bonzini { 71c2b38b27SPaolo Bonzini QEMUBH *bh; 72c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 73c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 74c2b38b27SPaolo Bonzini .ctx = ctx, 75c2b38b27SPaolo Bonzini .cb = cb, 76c2b38b27SPaolo Bonzini .opaque = opaque, 77c2b38b27SPaolo Bonzini }; 78c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 79c2b38b27SPaolo Bonzini bh->next = ctx->first_bh; 80c2b38b27SPaolo Bonzini /* Make sure that the members are ready before putting bh into list */ 81c2b38b27SPaolo Bonzini smp_wmb(); 82c2b38b27SPaolo Bonzini ctx->first_bh = bh; 83c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 84c2b38b27SPaolo Bonzini return bh; 85c2b38b27SPaolo Bonzini } 86c2b38b27SPaolo Bonzini 87c2b38b27SPaolo Bonzini void aio_bh_call(QEMUBH *bh) 88c2b38b27SPaolo Bonzini { 89c2b38b27SPaolo Bonzini bh->cb(bh->opaque); 90c2b38b27SPaolo Bonzini } 91c2b38b27SPaolo Bonzini 92bd451435SPaolo Bonzini /* Multiple occurrences of aio_bh_poll cannot be called concurrently. 93bd451435SPaolo Bonzini * The count in ctx->list_lock is incremented before the call, and is 94bd451435SPaolo Bonzini * not affected by the call. 95bd451435SPaolo Bonzini */ 96c2b38b27SPaolo Bonzini int aio_bh_poll(AioContext *ctx) 97c2b38b27SPaolo Bonzini { 98c2b38b27SPaolo Bonzini QEMUBH *bh, **bhp, *next; 99c2b38b27SPaolo Bonzini int ret; 100c2b38b27SPaolo Bonzini bool deleted = false; 101c2b38b27SPaolo Bonzini 102c2b38b27SPaolo Bonzini ret = 0; 103c2b38b27SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) { 104c2b38b27SPaolo Bonzini next = atomic_rcu_read(&bh->next); 105c2b38b27SPaolo Bonzini /* The atomic_xchg is paired with the one in qemu_bh_schedule. The 106c2b38b27SPaolo Bonzini * implicit memory barrier ensures that the callback sees all writes 107c2b38b27SPaolo Bonzini * done by the scheduling thread. It also ensures that the scheduling 108c2b38b27SPaolo Bonzini * thread sees the zero before bh->cb has run, and thus will call 109c2b38b27SPaolo Bonzini * aio_notify again if necessary. 110c2b38b27SPaolo Bonzini */ 111c2b38b27SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 0)) { 112c2b38b27SPaolo Bonzini /* Idle BHs don't count as progress */ 113c2b38b27SPaolo Bonzini if (!bh->idle) { 114c2b38b27SPaolo Bonzini ret = 1; 115c2b38b27SPaolo Bonzini } 116c2b38b27SPaolo Bonzini bh->idle = 0; 117c2b38b27SPaolo Bonzini aio_bh_call(bh); 118c2b38b27SPaolo Bonzini } 119c2b38b27SPaolo Bonzini if (bh->deleted) { 120c2b38b27SPaolo Bonzini deleted = true; 121c2b38b27SPaolo Bonzini } 122c2b38b27SPaolo Bonzini } 123c2b38b27SPaolo Bonzini 124c2b38b27SPaolo Bonzini /* remove deleted bhs */ 125c2b38b27SPaolo Bonzini if (!deleted) { 126c2b38b27SPaolo Bonzini return ret; 127c2b38b27SPaolo Bonzini } 128c2b38b27SPaolo Bonzini 129bd451435SPaolo Bonzini if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { 130c2b38b27SPaolo Bonzini bhp = &ctx->first_bh; 131c2b38b27SPaolo Bonzini while (*bhp) { 132c2b38b27SPaolo Bonzini bh = *bhp; 133c2b38b27SPaolo Bonzini if (bh->deleted && !bh->scheduled) { 134c2b38b27SPaolo Bonzini *bhp = bh->next; 135c2b38b27SPaolo Bonzini g_free(bh); 136c2b38b27SPaolo Bonzini } else { 137c2b38b27SPaolo Bonzini bhp = &bh->next; 138c2b38b27SPaolo Bonzini } 139c2b38b27SPaolo Bonzini } 140bd451435SPaolo Bonzini qemu_lockcnt_inc_and_unlock(&ctx->list_lock); 141c2b38b27SPaolo Bonzini } 142c2b38b27SPaolo Bonzini return ret; 143c2b38b27SPaolo Bonzini } 144c2b38b27SPaolo Bonzini 145c2b38b27SPaolo Bonzini void qemu_bh_schedule_idle(QEMUBH *bh) 146c2b38b27SPaolo Bonzini { 147c2b38b27SPaolo Bonzini bh->idle = 1; 148c2b38b27SPaolo Bonzini /* Make sure that idle & any writes needed by the callback are done 149c2b38b27SPaolo Bonzini * before the locations are read in the aio_bh_poll. 150c2b38b27SPaolo Bonzini */ 151c2b38b27SPaolo Bonzini atomic_mb_set(&bh->scheduled, 1); 152c2b38b27SPaolo Bonzini } 153c2b38b27SPaolo Bonzini 154c2b38b27SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh) 155c2b38b27SPaolo Bonzini { 156c2b38b27SPaolo Bonzini AioContext *ctx; 157c2b38b27SPaolo Bonzini 158c2b38b27SPaolo Bonzini ctx = bh->ctx; 159c2b38b27SPaolo Bonzini bh->idle = 0; 160c2b38b27SPaolo Bonzini /* The memory barrier implicit in atomic_xchg makes sure that: 161c2b38b27SPaolo Bonzini * 1. idle & any writes needed by the callback are done before the 162c2b38b27SPaolo Bonzini * locations are read in the aio_bh_poll. 163c2b38b27SPaolo Bonzini * 2. ctx is loaded before scheduled is set and the callback has a chance 164c2b38b27SPaolo Bonzini * to execute. 165c2b38b27SPaolo Bonzini */ 166c2b38b27SPaolo Bonzini if (atomic_xchg(&bh->scheduled, 1) == 0) { 167c2b38b27SPaolo Bonzini aio_notify(ctx); 168c2b38b27SPaolo Bonzini } 169c2b38b27SPaolo Bonzini } 170c2b38b27SPaolo Bonzini 171c2b38b27SPaolo Bonzini 172c2b38b27SPaolo Bonzini /* This func is async. 173c2b38b27SPaolo Bonzini */ 174c2b38b27SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh) 175c2b38b27SPaolo Bonzini { 176ef6dada8SSergio Lopez atomic_mb_set(&bh->scheduled, 0); 177c2b38b27SPaolo Bonzini } 178c2b38b27SPaolo Bonzini 179c2b38b27SPaolo Bonzini /* This func is async.The bottom half will do the delete action at the finial 180c2b38b27SPaolo Bonzini * end. 181c2b38b27SPaolo Bonzini */ 182c2b38b27SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh) 183c2b38b27SPaolo Bonzini { 184c2b38b27SPaolo Bonzini bh->scheduled = 0; 185c2b38b27SPaolo Bonzini bh->deleted = 1; 186c2b38b27SPaolo Bonzini } 187c2b38b27SPaolo Bonzini 188c2b38b27SPaolo Bonzini int64_t 189c2b38b27SPaolo Bonzini aio_compute_timeout(AioContext *ctx) 190c2b38b27SPaolo Bonzini { 191c2b38b27SPaolo Bonzini int64_t deadline; 192c2b38b27SPaolo Bonzini int timeout = -1; 193c2b38b27SPaolo Bonzini QEMUBH *bh; 194c2b38b27SPaolo Bonzini 195c2b38b27SPaolo Bonzini for (bh = atomic_rcu_read(&ctx->first_bh); bh; 196c2b38b27SPaolo Bonzini bh = atomic_rcu_read(&bh->next)) { 197c2b38b27SPaolo Bonzini if (bh->scheduled) { 198c2b38b27SPaolo Bonzini if (bh->idle) { 199c2b38b27SPaolo Bonzini /* idle bottom halves will be polled at least 200c2b38b27SPaolo Bonzini * every 10ms */ 201c2b38b27SPaolo Bonzini timeout = 10000000; 202c2b38b27SPaolo Bonzini } else { 203c2b38b27SPaolo Bonzini /* non-idle bottom halves will be executed 204c2b38b27SPaolo Bonzini * immediately */ 205c2b38b27SPaolo Bonzini return 0; 206c2b38b27SPaolo Bonzini } 207c2b38b27SPaolo Bonzini } 208c2b38b27SPaolo Bonzini } 209c2b38b27SPaolo Bonzini 210c2b38b27SPaolo Bonzini deadline = timerlistgroup_deadline_ns(&ctx->tlg); 211c2b38b27SPaolo Bonzini if (deadline == 0) { 212c2b38b27SPaolo Bonzini return 0; 213c2b38b27SPaolo Bonzini } else { 214c2b38b27SPaolo Bonzini return qemu_soonest_timeout(timeout, deadline); 215c2b38b27SPaolo Bonzini } 216c2b38b27SPaolo Bonzini } 217c2b38b27SPaolo Bonzini 218c2b38b27SPaolo Bonzini static gboolean 219c2b38b27SPaolo Bonzini aio_ctx_prepare(GSource *source, gint *timeout) 220c2b38b27SPaolo Bonzini { 221c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 222c2b38b27SPaolo Bonzini 223c2b38b27SPaolo Bonzini atomic_or(&ctx->notify_me, 1); 224c2b38b27SPaolo Bonzini 225c2b38b27SPaolo Bonzini /* We assume there is no timeout already supplied */ 226c2b38b27SPaolo Bonzini *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); 227c2b38b27SPaolo Bonzini 228c2b38b27SPaolo Bonzini if (aio_prepare(ctx)) { 229c2b38b27SPaolo Bonzini *timeout = 0; 230c2b38b27SPaolo Bonzini } 231c2b38b27SPaolo Bonzini 232c2b38b27SPaolo Bonzini return *timeout == 0; 233c2b38b27SPaolo Bonzini } 234c2b38b27SPaolo Bonzini 235c2b38b27SPaolo Bonzini static gboolean 236c2b38b27SPaolo Bonzini aio_ctx_check(GSource *source) 237c2b38b27SPaolo Bonzini { 238c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 239c2b38b27SPaolo Bonzini QEMUBH *bh; 240c2b38b27SPaolo Bonzini 241c2b38b27SPaolo Bonzini atomic_and(&ctx->notify_me, ~1); 242c2b38b27SPaolo Bonzini aio_notify_accept(ctx); 243c2b38b27SPaolo Bonzini 244c2b38b27SPaolo Bonzini for (bh = ctx->first_bh; bh; bh = bh->next) { 245c2b38b27SPaolo Bonzini if (bh->scheduled) { 246c2b38b27SPaolo Bonzini return true; 247c2b38b27SPaolo Bonzini } 248c2b38b27SPaolo Bonzini } 249c2b38b27SPaolo Bonzini return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0); 250c2b38b27SPaolo Bonzini } 251c2b38b27SPaolo Bonzini 252c2b38b27SPaolo Bonzini static gboolean 253c2b38b27SPaolo Bonzini aio_ctx_dispatch(GSource *source, 254c2b38b27SPaolo Bonzini GSourceFunc callback, 255c2b38b27SPaolo Bonzini gpointer user_data) 256c2b38b27SPaolo Bonzini { 257c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 258c2b38b27SPaolo Bonzini 259c2b38b27SPaolo Bonzini assert(callback == NULL); 260a153bf52SPaolo Bonzini aio_dispatch(ctx); 261c2b38b27SPaolo Bonzini return true; 262c2b38b27SPaolo Bonzini } 263c2b38b27SPaolo Bonzini 264c2b38b27SPaolo Bonzini static void 265c2b38b27SPaolo Bonzini aio_ctx_finalize(GSource *source) 266c2b38b27SPaolo Bonzini { 267c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 268c2b38b27SPaolo Bonzini 269c2b38b27SPaolo Bonzini thread_pool_free(ctx->thread_pool); 270c2b38b27SPaolo Bonzini 271c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 272c2b38b27SPaolo Bonzini if (ctx->linux_aio) { 273c2b38b27SPaolo Bonzini laio_detach_aio_context(ctx->linux_aio, ctx); 274c2b38b27SPaolo Bonzini laio_cleanup(ctx->linux_aio); 275c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 276c2b38b27SPaolo Bonzini } 277c2b38b27SPaolo Bonzini #endif 278c2b38b27SPaolo Bonzini 279*fcb7a4a4SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 280*fcb7a4a4SAarushi Mehta if (ctx->linux_io_uring) { 281*fcb7a4a4SAarushi Mehta luring_detach_aio_context(ctx->linux_io_uring, ctx); 282*fcb7a4a4SAarushi Mehta luring_cleanup(ctx->linux_io_uring); 283*fcb7a4a4SAarushi Mehta ctx->linux_io_uring = NULL; 284*fcb7a4a4SAarushi Mehta } 285*fcb7a4a4SAarushi Mehta #endif 286*fcb7a4a4SAarushi Mehta 2870c330a73SPaolo Bonzini assert(QSLIST_EMPTY(&ctx->scheduled_coroutines)); 2880c330a73SPaolo Bonzini qemu_bh_delete(ctx->co_schedule_bh); 2890c330a73SPaolo Bonzini 290c2b38b27SPaolo Bonzini qemu_lockcnt_lock(&ctx->list_lock); 291c2b38b27SPaolo Bonzini assert(!qemu_lockcnt_count(&ctx->list_lock)); 292c2b38b27SPaolo Bonzini while (ctx->first_bh) { 293c2b38b27SPaolo Bonzini QEMUBH *next = ctx->first_bh->next; 294c2b38b27SPaolo Bonzini 295c2b38b27SPaolo Bonzini /* qemu_bh_delete() must have been called on BHs in this AioContext */ 296c2b38b27SPaolo Bonzini assert(ctx->first_bh->deleted); 297c2b38b27SPaolo Bonzini 298c2b38b27SPaolo Bonzini g_free(ctx->first_bh); 299c2b38b27SPaolo Bonzini ctx->first_bh = next; 300c2b38b27SPaolo Bonzini } 301c2b38b27SPaolo Bonzini qemu_lockcnt_unlock(&ctx->list_lock); 302c2b38b27SPaolo Bonzini 303c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); 304c2b38b27SPaolo Bonzini event_notifier_cleanup(&ctx->notifier); 305c2b38b27SPaolo Bonzini qemu_rec_mutex_destroy(&ctx->lock); 306c2b38b27SPaolo Bonzini qemu_lockcnt_destroy(&ctx->list_lock); 307c2b38b27SPaolo Bonzini timerlistgroup_deinit(&ctx->tlg); 308cd0a6d2bSJie Wang aio_context_destroy(ctx); 309c2b38b27SPaolo Bonzini } 310c2b38b27SPaolo Bonzini 311c2b38b27SPaolo Bonzini static GSourceFuncs aio_source_funcs = { 312c2b38b27SPaolo Bonzini aio_ctx_prepare, 313c2b38b27SPaolo Bonzini aio_ctx_check, 314c2b38b27SPaolo Bonzini aio_ctx_dispatch, 315c2b38b27SPaolo Bonzini aio_ctx_finalize 316c2b38b27SPaolo Bonzini }; 317c2b38b27SPaolo Bonzini 318c2b38b27SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx) 319c2b38b27SPaolo Bonzini { 320c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 321c2b38b27SPaolo Bonzini return &ctx->source; 322c2b38b27SPaolo Bonzini } 323c2b38b27SPaolo Bonzini 324c2b38b27SPaolo Bonzini ThreadPool *aio_get_thread_pool(AioContext *ctx) 325c2b38b27SPaolo Bonzini { 326c2b38b27SPaolo Bonzini if (!ctx->thread_pool) { 327c2b38b27SPaolo Bonzini ctx->thread_pool = thread_pool_new(ctx); 328c2b38b27SPaolo Bonzini } 329c2b38b27SPaolo Bonzini return ctx->thread_pool; 330c2b38b27SPaolo Bonzini } 331c2b38b27SPaolo Bonzini 332c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 333ed6e2161SNishanth Aravamudan LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp) 334c2b38b27SPaolo Bonzini { 335c2b38b27SPaolo Bonzini if (!ctx->linux_aio) { 336ed6e2161SNishanth Aravamudan ctx->linux_aio = laio_init(errp); 337ed6e2161SNishanth Aravamudan if (ctx->linux_aio) { 338c2b38b27SPaolo Bonzini laio_attach_aio_context(ctx->linux_aio, ctx); 339c2b38b27SPaolo Bonzini } 340ed6e2161SNishanth Aravamudan } 341ed6e2161SNishanth Aravamudan return ctx->linux_aio; 342ed6e2161SNishanth Aravamudan } 343ed6e2161SNishanth Aravamudan 344ed6e2161SNishanth Aravamudan LinuxAioState *aio_get_linux_aio(AioContext *ctx) 345ed6e2161SNishanth Aravamudan { 346ed6e2161SNishanth Aravamudan assert(ctx->linux_aio); 347c2b38b27SPaolo Bonzini return ctx->linux_aio; 348c2b38b27SPaolo Bonzini } 349c2b38b27SPaolo Bonzini #endif 350c2b38b27SPaolo Bonzini 351*fcb7a4a4SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 352*fcb7a4a4SAarushi Mehta LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp) 353*fcb7a4a4SAarushi Mehta { 354*fcb7a4a4SAarushi Mehta if (ctx->linux_io_uring) { 355*fcb7a4a4SAarushi Mehta return ctx->linux_io_uring; 356*fcb7a4a4SAarushi Mehta } 357*fcb7a4a4SAarushi Mehta 358*fcb7a4a4SAarushi Mehta ctx->linux_io_uring = luring_init(errp); 359*fcb7a4a4SAarushi Mehta if (!ctx->linux_io_uring) { 360*fcb7a4a4SAarushi Mehta return NULL; 361*fcb7a4a4SAarushi Mehta } 362*fcb7a4a4SAarushi Mehta 363*fcb7a4a4SAarushi Mehta luring_attach_aio_context(ctx->linux_io_uring, ctx); 364*fcb7a4a4SAarushi Mehta return ctx->linux_io_uring; 365*fcb7a4a4SAarushi Mehta } 366*fcb7a4a4SAarushi Mehta 367*fcb7a4a4SAarushi Mehta LuringState *aio_get_linux_io_uring(AioContext *ctx) 368*fcb7a4a4SAarushi Mehta { 369*fcb7a4a4SAarushi Mehta assert(ctx->linux_io_uring); 370*fcb7a4a4SAarushi Mehta return ctx->linux_io_uring; 371*fcb7a4a4SAarushi Mehta } 372*fcb7a4a4SAarushi Mehta #endif 373*fcb7a4a4SAarushi Mehta 374c2b38b27SPaolo Bonzini void aio_notify(AioContext *ctx) 375c2b38b27SPaolo Bonzini { 376c2b38b27SPaolo Bonzini /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs 377c2b38b27SPaolo Bonzini * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. 378c2b38b27SPaolo Bonzini */ 379c2b38b27SPaolo Bonzini smp_mb(); 380c2b38b27SPaolo Bonzini if (ctx->notify_me) { 381c2b38b27SPaolo Bonzini event_notifier_set(&ctx->notifier); 382c2b38b27SPaolo Bonzini atomic_mb_set(&ctx->notified, true); 383c2b38b27SPaolo Bonzini } 384c2b38b27SPaolo Bonzini } 385c2b38b27SPaolo Bonzini 386c2b38b27SPaolo Bonzini void aio_notify_accept(AioContext *ctx) 387c2b38b27SPaolo Bonzini { 388873df2ceSMarc-André Lureau if (atomic_xchg(&ctx->notified, false) 389873df2ceSMarc-André Lureau #ifdef WIN32 390873df2ceSMarc-André Lureau || true 391873df2ceSMarc-André Lureau #endif 392873df2ceSMarc-André Lureau ) { 393c2b38b27SPaolo Bonzini event_notifier_test_and_clear(&ctx->notifier); 394c2b38b27SPaolo Bonzini } 395c2b38b27SPaolo Bonzini } 396c2b38b27SPaolo Bonzini 3973f53bc61SPaolo Bonzini static void aio_timerlist_notify(void *opaque, QEMUClockType type) 398c2b38b27SPaolo Bonzini { 399c2b38b27SPaolo Bonzini aio_notify(opaque); 400c2b38b27SPaolo Bonzini } 401c2b38b27SPaolo Bonzini 402c2b38b27SPaolo Bonzini static void event_notifier_dummy_cb(EventNotifier *e) 403c2b38b27SPaolo Bonzini { 404c2b38b27SPaolo Bonzini } 405c2b38b27SPaolo Bonzini 406c2b38b27SPaolo Bonzini /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */ 407c2b38b27SPaolo Bonzini static bool event_notifier_poll(void *opaque) 408c2b38b27SPaolo Bonzini { 409c2b38b27SPaolo Bonzini EventNotifier *e = opaque; 410c2b38b27SPaolo Bonzini AioContext *ctx = container_of(e, AioContext, notifier); 411c2b38b27SPaolo Bonzini 412c2b38b27SPaolo Bonzini return atomic_read(&ctx->notified); 413c2b38b27SPaolo Bonzini } 414c2b38b27SPaolo Bonzini 4150c330a73SPaolo Bonzini static void co_schedule_bh_cb(void *opaque) 4160c330a73SPaolo Bonzini { 4170c330a73SPaolo Bonzini AioContext *ctx = opaque; 4180c330a73SPaolo Bonzini QSLIST_HEAD(, Coroutine) straight, reversed; 4190c330a73SPaolo Bonzini 4200c330a73SPaolo Bonzini QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines); 4210c330a73SPaolo Bonzini QSLIST_INIT(&straight); 4220c330a73SPaolo Bonzini 4230c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&reversed)) { 4240c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&reversed); 4250c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next); 4260c330a73SPaolo Bonzini QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next); 4270c330a73SPaolo Bonzini } 4280c330a73SPaolo Bonzini 4290c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&straight)) { 4300c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&straight); 4310c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); 4320c330a73SPaolo Bonzini trace_aio_co_schedule_bh_cb(ctx, co); 4331919631eSPaolo Bonzini aio_context_acquire(ctx); 4346133b39fSJeff Cody 4356133b39fSJeff Cody /* Protected by write barrier in qemu_aio_coroutine_enter */ 4366133b39fSJeff Cody atomic_set(&co->scheduled, NULL); 4376808ae04SSergio Lopez qemu_aio_coroutine_enter(ctx, co); 4381919631eSPaolo Bonzini aio_context_release(ctx); 4390c330a73SPaolo Bonzini } 4400c330a73SPaolo Bonzini } 4410c330a73SPaolo Bonzini 442c2b38b27SPaolo Bonzini AioContext *aio_context_new(Error **errp) 443c2b38b27SPaolo Bonzini { 444c2b38b27SPaolo Bonzini int ret; 445c2b38b27SPaolo Bonzini AioContext *ctx; 446c2b38b27SPaolo Bonzini 447c2b38b27SPaolo Bonzini ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); 448c2b38b27SPaolo Bonzini aio_context_setup(ctx); 449c2b38b27SPaolo Bonzini 450c2b38b27SPaolo Bonzini ret = event_notifier_init(&ctx->notifier, false); 451c2b38b27SPaolo Bonzini if (ret < 0) { 452c2b38b27SPaolo Bonzini error_setg_errno(errp, -ret, "Failed to initialize event notifier"); 453c2b38b27SPaolo Bonzini goto fail; 454c2b38b27SPaolo Bonzini } 455c2b38b27SPaolo Bonzini g_source_set_can_recurse(&ctx->source, true); 456c2b38b27SPaolo Bonzini qemu_lockcnt_init(&ctx->list_lock); 4570c330a73SPaolo Bonzini 4580c330a73SPaolo Bonzini ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx); 4590c330a73SPaolo Bonzini QSLIST_INIT(&ctx->scheduled_coroutines); 4600c330a73SPaolo Bonzini 461c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, 462c2b38b27SPaolo Bonzini false, 463c2b38b27SPaolo Bonzini event_notifier_dummy_cb, 464c2b38b27SPaolo Bonzini event_notifier_poll); 465c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 466c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 467c2b38b27SPaolo Bonzini #endif 468*fcb7a4a4SAarushi Mehta 469*fcb7a4a4SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 470*fcb7a4a4SAarushi Mehta ctx->linux_io_uring = NULL; 471*fcb7a4a4SAarushi Mehta #endif 472*fcb7a4a4SAarushi Mehta 473c2b38b27SPaolo Bonzini ctx->thread_pool = NULL; 474c2b38b27SPaolo Bonzini qemu_rec_mutex_init(&ctx->lock); 475c2b38b27SPaolo Bonzini timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); 476c2b38b27SPaolo Bonzini 477c2b38b27SPaolo Bonzini ctx->poll_ns = 0; 478c2b38b27SPaolo Bonzini ctx->poll_max_ns = 0; 479c2b38b27SPaolo Bonzini ctx->poll_grow = 0; 480c2b38b27SPaolo Bonzini ctx->poll_shrink = 0; 481c2b38b27SPaolo Bonzini 482c2b38b27SPaolo Bonzini return ctx; 483c2b38b27SPaolo Bonzini fail: 484c2b38b27SPaolo Bonzini g_source_destroy(&ctx->source); 485c2b38b27SPaolo Bonzini return NULL; 486c2b38b27SPaolo Bonzini } 487c2b38b27SPaolo Bonzini 4880c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, Coroutine *co) 4890c330a73SPaolo Bonzini { 4900c330a73SPaolo Bonzini trace_aio_co_schedule(ctx, co); 4916133b39fSJeff Cody const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, 4926133b39fSJeff Cody __func__); 4936133b39fSJeff Cody 4946133b39fSJeff Cody if (scheduled) { 4956133b39fSJeff Cody fprintf(stderr, 4966133b39fSJeff Cody "%s: Co-routine was already scheduled in '%s'\n", 4976133b39fSJeff Cody __func__, scheduled); 4986133b39fSJeff Cody abort(); 4996133b39fSJeff Cody } 5006133b39fSJeff Cody 501f0f81002SStefan Hajnoczi /* The coroutine might run and release the last ctx reference before we 502f0f81002SStefan Hajnoczi * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until 503f0f81002SStefan Hajnoczi * we're done. 504f0f81002SStefan Hajnoczi */ 505f0f81002SStefan Hajnoczi aio_context_ref(ctx); 506f0f81002SStefan Hajnoczi 5070c330a73SPaolo Bonzini QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, 5080c330a73SPaolo Bonzini co, co_scheduled_next); 5090c330a73SPaolo Bonzini qemu_bh_schedule(ctx->co_schedule_bh); 510f0f81002SStefan Hajnoczi 511f0f81002SStefan Hajnoczi aio_context_unref(ctx); 5120c330a73SPaolo Bonzini } 5130c330a73SPaolo Bonzini 5140c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co) 5150c330a73SPaolo Bonzini { 5160c330a73SPaolo Bonzini AioContext *ctx; 5170c330a73SPaolo Bonzini 5180c330a73SPaolo Bonzini /* Read coroutine before co->ctx. Matches smp_wmb in 5190c330a73SPaolo Bonzini * qemu_coroutine_enter. 5200c330a73SPaolo Bonzini */ 5210c330a73SPaolo Bonzini smp_read_barrier_depends(); 5220c330a73SPaolo Bonzini ctx = atomic_read(&co->ctx); 5230c330a73SPaolo Bonzini 5248865852eSFam Zheng aio_co_enter(ctx, co); 5258865852eSFam Zheng } 5268865852eSFam Zheng 5278865852eSFam Zheng void aio_co_enter(AioContext *ctx, struct Coroutine *co) 5288865852eSFam Zheng { 5290c330a73SPaolo Bonzini if (ctx != qemu_get_current_aio_context()) { 5300c330a73SPaolo Bonzini aio_co_schedule(ctx, co); 5310c330a73SPaolo Bonzini return; 5320c330a73SPaolo Bonzini } 5330c330a73SPaolo Bonzini 5340c330a73SPaolo Bonzini if (qemu_in_coroutine()) { 5350c330a73SPaolo Bonzini Coroutine *self = qemu_coroutine_self(); 5360c330a73SPaolo Bonzini assert(self != co); 5370c330a73SPaolo Bonzini QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); 5380c330a73SPaolo Bonzini } else { 5390c330a73SPaolo Bonzini aio_context_acquire(ctx); 5408865852eSFam Zheng qemu_aio_coroutine_enter(ctx, co); 5410c330a73SPaolo Bonzini aio_context_release(ctx); 5420c330a73SPaolo Bonzini } 5430c330a73SPaolo Bonzini } 5440c330a73SPaolo Bonzini 545c2b38b27SPaolo Bonzini void aio_context_ref(AioContext *ctx) 546c2b38b27SPaolo Bonzini { 547c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 548c2b38b27SPaolo Bonzini } 549c2b38b27SPaolo Bonzini 550c2b38b27SPaolo Bonzini void aio_context_unref(AioContext *ctx) 551c2b38b27SPaolo Bonzini { 552c2b38b27SPaolo Bonzini g_source_unref(&ctx->source); 553c2b38b27SPaolo Bonzini } 554c2b38b27SPaolo Bonzini 555c2b38b27SPaolo Bonzini void aio_context_acquire(AioContext *ctx) 556c2b38b27SPaolo Bonzini { 557c2b38b27SPaolo Bonzini qemu_rec_mutex_lock(&ctx->lock); 558c2b38b27SPaolo Bonzini } 559c2b38b27SPaolo Bonzini 560c2b38b27SPaolo Bonzini void aio_context_release(AioContext *ctx) 561c2b38b27SPaolo Bonzini { 562c2b38b27SPaolo Bonzini qemu_rec_mutex_unlock(&ctx->lock); 563c2b38b27SPaolo Bonzini } 564