1c2b38b27SPaolo Bonzini /* 2c2b38b27SPaolo Bonzini * Data plane event loop 3c2b38b27SPaolo Bonzini * 4c2b38b27SPaolo Bonzini * Copyright (c) 2003-2008 Fabrice Bellard 5c2b38b27SPaolo Bonzini * Copyright (c) 2009-2017 QEMU contributors 6c2b38b27SPaolo Bonzini * 7c2b38b27SPaolo Bonzini * Permission is hereby granted, free of charge, to any person obtaining a copy 8c2b38b27SPaolo Bonzini * of this software and associated documentation files (the "Software"), to deal 9c2b38b27SPaolo Bonzini * in the Software without restriction, including without limitation the rights 10c2b38b27SPaolo Bonzini * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11c2b38b27SPaolo Bonzini * copies of the Software, and to permit persons to whom the Software is 12c2b38b27SPaolo Bonzini * furnished to do so, subject to the following conditions: 13c2b38b27SPaolo Bonzini * 14c2b38b27SPaolo Bonzini * The above copyright notice and this permission notice shall be included in 15c2b38b27SPaolo Bonzini * all copies or substantial portions of the Software. 16c2b38b27SPaolo Bonzini * 17c2b38b27SPaolo Bonzini * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18c2b38b27SPaolo Bonzini * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19c2b38b27SPaolo Bonzini * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20c2b38b27SPaolo Bonzini * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21c2b38b27SPaolo Bonzini * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22c2b38b27SPaolo Bonzini * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23c2b38b27SPaolo Bonzini * THE SOFTWARE. 24c2b38b27SPaolo Bonzini */ 25c2b38b27SPaolo Bonzini 26c2b38b27SPaolo Bonzini #include "qemu/osdep.h" 27c2b38b27SPaolo Bonzini #include "qapi/error.h" 28c2b38b27SPaolo Bonzini #include "block/aio.h" 29c2b38b27SPaolo Bonzini #include "block/thread-pool.h" 30c2b38b27SPaolo Bonzini #include "qemu/main-loop.h" 31c2b38b27SPaolo Bonzini #include "qemu/atomic.h" 32*8c6b0356SStefan Hajnoczi #include "qemu/rcu_queue.h" 33c2b38b27SPaolo Bonzini #include "block/raw-aio.h" 340c330a73SPaolo Bonzini #include "qemu/coroutine_int.h" 350c330a73SPaolo Bonzini #include "trace.h" 36c2b38b27SPaolo Bonzini 37c2b38b27SPaolo Bonzini /***********************************************************/ 38c2b38b27SPaolo Bonzini /* bottom halves (can be seen as timers which expire ASAP) */ 39c2b38b27SPaolo Bonzini 40*8c6b0356SStefan Hajnoczi /* QEMUBH::flags values */ 41*8c6b0356SStefan Hajnoczi enum { 42*8c6b0356SStefan Hajnoczi /* Already enqueued and waiting for aio_bh_poll() */ 43*8c6b0356SStefan Hajnoczi BH_PENDING = (1 << 0), 44*8c6b0356SStefan Hajnoczi 45*8c6b0356SStefan Hajnoczi /* Invoke the callback */ 46*8c6b0356SStefan Hajnoczi BH_SCHEDULED = (1 << 1), 47*8c6b0356SStefan Hajnoczi 48*8c6b0356SStefan Hajnoczi /* Delete without invoking callback */ 49*8c6b0356SStefan Hajnoczi BH_DELETED = (1 << 2), 50*8c6b0356SStefan Hajnoczi 51*8c6b0356SStefan Hajnoczi /* Delete after invoking callback */ 52*8c6b0356SStefan Hajnoczi BH_ONESHOT = (1 << 3), 53*8c6b0356SStefan Hajnoczi 54*8c6b0356SStefan Hajnoczi /* Schedule periodically when the event loop is idle */ 55*8c6b0356SStefan Hajnoczi BH_IDLE = (1 << 4), 56*8c6b0356SStefan Hajnoczi }; 57*8c6b0356SStefan Hajnoczi 58c2b38b27SPaolo Bonzini struct QEMUBH { 59c2b38b27SPaolo Bonzini AioContext *ctx; 60c2b38b27SPaolo Bonzini QEMUBHFunc *cb; 61c2b38b27SPaolo Bonzini void *opaque; 62*8c6b0356SStefan Hajnoczi QSLIST_ENTRY(QEMUBH) next; 63*8c6b0356SStefan Hajnoczi unsigned flags; 64c2b38b27SPaolo Bonzini }; 65c2b38b27SPaolo Bonzini 66*8c6b0356SStefan Hajnoczi /* Called concurrently from any thread */ 67*8c6b0356SStefan Hajnoczi static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) 68*8c6b0356SStefan Hajnoczi { 69*8c6b0356SStefan Hajnoczi AioContext *ctx = bh->ctx; 70*8c6b0356SStefan Hajnoczi unsigned old_flags; 71*8c6b0356SStefan Hajnoczi 72*8c6b0356SStefan Hajnoczi /* 73*8c6b0356SStefan Hajnoczi * The memory barrier implicit in atomic_fetch_or makes sure that: 74*8c6b0356SStefan Hajnoczi * 1. idle & any writes needed by the callback are done before the 75*8c6b0356SStefan Hajnoczi * locations are read in the aio_bh_poll. 76*8c6b0356SStefan Hajnoczi * 2. ctx is loaded before the callback has a chance to execute and bh 77*8c6b0356SStefan Hajnoczi * could be freed. 78*8c6b0356SStefan Hajnoczi */ 79*8c6b0356SStefan Hajnoczi old_flags = atomic_fetch_or(&bh->flags, BH_PENDING | new_flags); 80*8c6b0356SStefan Hajnoczi if (!(old_flags & BH_PENDING)) { 81*8c6b0356SStefan Hajnoczi QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); 82*8c6b0356SStefan Hajnoczi } 83*8c6b0356SStefan Hajnoczi 84*8c6b0356SStefan Hajnoczi aio_notify(ctx); 85*8c6b0356SStefan Hajnoczi } 86*8c6b0356SStefan Hajnoczi 87*8c6b0356SStefan Hajnoczi /* Only called from aio_bh_poll() and aio_ctx_finalize() */ 88*8c6b0356SStefan Hajnoczi static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) 89*8c6b0356SStefan Hajnoczi { 90*8c6b0356SStefan Hajnoczi QEMUBH *bh = QSLIST_FIRST_RCU(head); 91*8c6b0356SStefan Hajnoczi 92*8c6b0356SStefan Hajnoczi if (!bh) { 93*8c6b0356SStefan Hajnoczi return NULL; 94*8c6b0356SStefan Hajnoczi } 95*8c6b0356SStefan Hajnoczi 96*8c6b0356SStefan Hajnoczi QSLIST_REMOVE_HEAD(head, next); 97*8c6b0356SStefan Hajnoczi 98*8c6b0356SStefan Hajnoczi /* 99*8c6b0356SStefan Hajnoczi * The atomic_and is paired with aio_bh_enqueue(). The implicit memory 100*8c6b0356SStefan Hajnoczi * barrier ensures that the callback sees all writes done by the scheduling 101*8c6b0356SStefan Hajnoczi * thread. It also ensures that the scheduling thread sees the cleared 102*8c6b0356SStefan Hajnoczi * flag before bh->cb has run, and thus will call aio_notify again if 103*8c6b0356SStefan Hajnoczi * necessary. 104*8c6b0356SStefan Hajnoczi */ 105*8c6b0356SStefan Hajnoczi *flags = atomic_fetch_and(&bh->flags, 106*8c6b0356SStefan Hajnoczi ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); 107*8c6b0356SStefan Hajnoczi return bh; 108*8c6b0356SStefan Hajnoczi } 109*8c6b0356SStefan Hajnoczi 110c2b38b27SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 111c2b38b27SPaolo Bonzini { 112c2b38b27SPaolo Bonzini QEMUBH *bh; 113c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 114c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 115c2b38b27SPaolo Bonzini .ctx = ctx, 116c2b38b27SPaolo Bonzini .cb = cb, 117c2b38b27SPaolo Bonzini .opaque = opaque, 118c2b38b27SPaolo Bonzini }; 119*8c6b0356SStefan Hajnoczi aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT); 120c2b38b27SPaolo Bonzini } 121c2b38b27SPaolo Bonzini 122c2b38b27SPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) 123c2b38b27SPaolo Bonzini { 124c2b38b27SPaolo Bonzini QEMUBH *bh; 125c2b38b27SPaolo Bonzini bh = g_new(QEMUBH, 1); 126c2b38b27SPaolo Bonzini *bh = (QEMUBH){ 127c2b38b27SPaolo Bonzini .ctx = ctx, 128c2b38b27SPaolo Bonzini .cb = cb, 129c2b38b27SPaolo Bonzini .opaque = opaque, 130c2b38b27SPaolo Bonzini }; 131c2b38b27SPaolo Bonzini return bh; 132c2b38b27SPaolo Bonzini } 133c2b38b27SPaolo Bonzini 134c2b38b27SPaolo Bonzini void aio_bh_call(QEMUBH *bh) 135c2b38b27SPaolo Bonzini { 136c2b38b27SPaolo Bonzini bh->cb(bh->opaque); 137c2b38b27SPaolo Bonzini } 138c2b38b27SPaolo Bonzini 139*8c6b0356SStefan Hajnoczi /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ 140c2b38b27SPaolo Bonzini int aio_bh_poll(AioContext *ctx) 141c2b38b27SPaolo Bonzini { 142*8c6b0356SStefan Hajnoczi BHListSlice slice; 143*8c6b0356SStefan Hajnoczi BHListSlice *s; 144*8c6b0356SStefan Hajnoczi int ret = 0; 145c2b38b27SPaolo Bonzini 146*8c6b0356SStefan Hajnoczi QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); 147*8c6b0356SStefan Hajnoczi QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); 148*8c6b0356SStefan Hajnoczi 149*8c6b0356SStefan Hajnoczi while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) { 150*8c6b0356SStefan Hajnoczi QEMUBH *bh; 151*8c6b0356SStefan Hajnoczi unsigned flags; 152*8c6b0356SStefan Hajnoczi 153*8c6b0356SStefan Hajnoczi bh = aio_bh_dequeue(&s->bh_list, &flags); 154*8c6b0356SStefan Hajnoczi if (!bh) { 155*8c6b0356SStefan Hajnoczi QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next); 156*8c6b0356SStefan Hajnoczi continue; 157*8c6b0356SStefan Hajnoczi } 158*8c6b0356SStefan Hajnoczi 159*8c6b0356SStefan Hajnoczi if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { 160c2b38b27SPaolo Bonzini /* Idle BHs don't count as progress */ 161*8c6b0356SStefan Hajnoczi if (!(flags & BH_IDLE)) { 162c2b38b27SPaolo Bonzini ret = 1; 163c2b38b27SPaolo Bonzini } 164c2b38b27SPaolo Bonzini aio_bh_call(bh); 165c2b38b27SPaolo Bonzini } 166*8c6b0356SStefan Hajnoczi if (flags & (BH_DELETED | BH_ONESHOT)) { 167c2b38b27SPaolo Bonzini g_free(bh); 168c2b38b27SPaolo Bonzini } 169c2b38b27SPaolo Bonzini } 170*8c6b0356SStefan Hajnoczi 171c2b38b27SPaolo Bonzini return ret; 172c2b38b27SPaolo Bonzini } 173c2b38b27SPaolo Bonzini 174c2b38b27SPaolo Bonzini void qemu_bh_schedule_idle(QEMUBH *bh) 175c2b38b27SPaolo Bonzini { 176*8c6b0356SStefan Hajnoczi aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE); 177c2b38b27SPaolo Bonzini } 178c2b38b27SPaolo Bonzini 179c2b38b27SPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh) 180c2b38b27SPaolo Bonzini { 181*8c6b0356SStefan Hajnoczi aio_bh_enqueue(bh, BH_SCHEDULED); 182c2b38b27SPaolo Bonzini } 183c2b38b27SPaolo Bonzini 184c2b38b27SPaolo Bonzini /* This func is async. 185c2b38b27SPaolo Bonzini */ 186c2b38b27SPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh) 187c2b38b27SPaolo Bonzini { 188*8c6b0356SStefan Hajnoczi atomic_and(&bh->flags, ~BH_SCHEDULED); 189c2b38b27SPaolo Bonzini } 190c2b38b27SPaolo Bonzini 191c2b38b27SPaolo Bonzini /* This func is async.The bottom half will do the delete action at the finial 192c2b38b27SPaolo Bonzini * end. 193c2b38b27SPaolo Bonzini */ 194c2b38b27SPaolo Bonzini void qemu_bh_delete(QEMUBH *bh) 195c2b38b27SPaolo Bonzini { 196*8c6b0356SStefan Hajnoczi aio_bh_enqueue(bh, BH_DELETED); 197c2b38b27SPaolo Bonzini } 198c2b38b27SPaolo Bonzini 199*8c6b0356SStefan Hajnoczi static int64_t aio_compute_bh_timeout(BHList *head, int timeout) 200c2b38b27SPaolo Bonzini { 201c2b38b27SPaolo Bonzini QEMUBH *bh; 202c2b38b27SPaolo Bonzini 203*8c6b0356SStefan Hajnoczi QSLIST_FOREACH_RCU(bh, head, next) { 204*8c6b0356SStefan Hajnoczi if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { 205*8c6b0356SStefan Hajnoczi if (bh->flags & BH_IDLE) { 206c2b38b27SPaolo Bonzini /* idle bottom halves will be polled at least 207c2b38b27SPaolo Bonzini * every 10ms */ 208c2b38b27SPaolo Bonzini timeout = 10000000; 209c2b38b27SPaolo Bonzini } else { 210c2b38b27SPaolo Bonzini /* non-idle bottom halves will be executed 211c2b38b27SPaolo Bonzini * immediately */ 212c2b38b27SPaolo Bonzini return 0; 213c2b38b27SPaolo Bonzini } 214c2b38b27SPaolo Bonzini } 215c2b38b27SPaolo Bonzini } 216c2b38b27SPaolo Bonzini 217*8c6b0356SStefan Hajnoczi return timeout; 218*8c6b0356SStefan Hajnoczi } 219*8c6b0356SStefan Hajnoczi 220*8c6b0356SStefan Hajnoczi int64_t 221*8c6b0356SStefan Hajnoczi aio_compute_timeout(AioContext *ctx) 222*8c6b0356SStefan Hajnoczi { 223*8c6b0356SStefan Hajnoczi BHListSlice *s; 224*8c6b0356SStefan Hajnoczi int64_t deadline; 225*8c6b0356SStefan Hajnoczi int timeout = -1; 226*8c6b0356SStefan Hajnoczi 227*8c6b0356SStefan Hajnoczi timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout); 228*8c6b0356SStefan Hajnoczi if (timeout == 0) { 229*8c6b0356SStefan Hajnoczi return 0; 230*8c6b0356SStefan Hajnoczi } 231*8c6b0356SStefan Hajnoczi 232*8c6b0356SStefan Hajnoczi QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) { 233*8c6b0356SStefan Hajnoczi timeout = aio_compute_bh_timeout(&s->bh_list, timeout); 234*8c6b0356SStefan Hajnoczi if (timeout == 0) { 235*8c6b0356SStefan Hajnoczi return 0; 236*8c6b0356SStefan Hajnoczi } 237*8c6b0356SStefan Hajnoczi } 238*8c6b0356SStefan Hajnoczi 239c2b38b27SPaolo Bonzini deadline = timerlistgroup_deadline_ns(&ctx->tlg); 240c2b38b27SPaolo Bonzini if (deadline == 0) { 241c2b38b27SPaolo Bonzini return 0; 242c2b38b27SPaolo Bonzini } else { 243c2b38b27SPaolo Bonzini return qemu_soonest_timeout(timeout, deadline); 244c2b38b27SPaolo Bonzini } 245c2b38b27SPaolo Bonzini } 246c2b38b27SPaolo Bonzini 247c2b38b27SPaolo Bonzini static gboolean 248c2b38b27SPaolo Bonzini aio_ctx_prepare(GSource *source, gint *timeout) 249c2b38b27SPaolo Bonzini { 250c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 251c2b38b27SPaolo Bonzini 252c2b38b27SPaolo Bonzini atomic_or(&ctx->notify_me, 1); 253c2b38b27SPaolo Bonzini 254c2b38b27SPaolo Bonzini /* We assume there is no timeout already supplied */ 255c2b38b27SPaolo Bonzini *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); 256c2b38b27SPaolo Bonzini 257c2b38b27SPaolo Bonzini if (aio_prepare(ctx)) { 258c2b38b27SPaolo Bonzini *timeout = 0; 259c2b38b27SPaolo Bonzini } 260c2b38b27SPaolo Bonzini 261c2b38b27SPaolo Bonzini return *timeout == 0; 262c2b38b27SPaolo Bonzini } 263c2b38b27SPaolo Bonzini 264c2b38b27SPaolo Bonzini static gboolean 265c2b38b27SPaolo Bonzini aio_ctx_check(GSource *source) 266c2b38b27SPaolo Bonzini { 267c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 268c2b38b27SPaolo Bonzini QEMUBH *bh; 269*8c6b0356SStefan Hajnoczi BHListSlice *s; 270c2b38b27SPaolo Bonzini 271c2b38b27SPaolo Bonzini atomic_and(&ctx->notify_me, ~1); 272c2b38b27SPaolo Bonzini aio_notify_accept(ctx); 273c2b38b27SPaolo Bonzini 274*8c6b0356SStefan Hajnoczi QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) { 275*8c6b0356SStefan Hajnoczi if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { 276c2b38b27SPaolo Bonzini return true; 277c2b38b27SPaolo Bonzini } 278c2b38b27SPaolo Bonzini } 279*8c6b0356SStefan Hajnoczi 280*8c6b0356SStefan Hajnoczi QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) { 281*8c6b0356SStefan Hajnoczi QSLIST_FOREACH_RCU(bh, &s->bh_list, next) { 282*8c6b0356SStefan Hajnoczi if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { 283*8c6b0356SStefan Hajnoczi return true; 284*8c6b0356SStefan Hajnoczi } 285*8c6b0356SStefan Hajnoczi } 286*8c6b0356SStefan Hajnoczi } 287c2b38b27SPaolo Bonzini return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0); 288c2b38b27SPaolo Bonzini } 289c2b38b27SPaolo Bonzini 290c2b38b27SPaolo Bonzini static gboolean 291c2b38b27SPaolo Bonzini aio_ctx_dispatch(GSource *source, 292c2b38b27SPaolo Bonzini GSourceFunc callback, 293c2b38b27SPaolo Bonzini gpointer user_data) 294c2b38b27SPaolo Bonzini { 295c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 296c2b38b27SPaolo Bonzini 297c2b38b27SPaolo Bonzini assert(callback == NULL); 298a153bf52SPaolo Bonzini aio_dispatch(ctx); 299c2b38b27SPaolo Bonzini return true; 300c2b38b27SPaolo Bonzini } 301c2b38b27SPaolo Bonzini 302c2b38b27SPaolo Bonzini static void 303c2b38b27SPaolo Bonzini aio_ctx_finalize(GSource *source) 304c2b38b27SPaolo Bonzini { 305c2b38b27SPaolo Bonzini AioContext *ctx = (AioContext *) source; 306*8c6b0356SStefan Hajnoczi QEMUBH *bh; 307*8c6b0356SStefan Hajnoczi unsigned flags; 308c2b38b27SPaolo Bonzini 309c2b38b27SPaolo Bonzini thread_pool_free(ctx->thread_pool); 310c2b38b27SPaolo Bonzini 311c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 312c2b38b27SPaolo Bonzini if (ctx->linux_aio) { 313c2b38b27SPaolo Bonzini laio_detach_aio_context(ctx->linux_aio, ctx); 314c2b38b27SPaolo Bonzini laio_cleanup(ctx->linux_aio); 315c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 316c2b38b27SPaolo Bonzini } 317c2b38b27SPaolo Bonzini #endif 318c2b38b27SPaolo Bonzini 319fcb7a4a4SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 320fcb7a4a4SAarushi Mehta if (ctx->linux_io_uring) { 321fcb7a4a4SAarushi Mehta luring_detach_aio_context(ctx->linux_io_uring, ctx); 322fcb7a4a4SAarushi Mehta luring_cleanup(ctx->linux_io_uring); 323fcb7a4a4SAarushi Mehta ctx->linux_io_uring = NULL; 324fcb7a4a4SAarushi Mehta } 325fcb7a4a4SAarushi Mehta #endif 326fcb7a4a4SAarushi Mehta 3270c330a73SPaolo Bonzini assert(QSLIST_EMPTY(&ctx->scheduled_coroutines)); 3280c330a73SPaolo Bonzini qemu_bh_delete(ctx->co_schedule_bh); 3290c330a73SPaolo Bonzini 330*8c6b0356SStefan Hajnoczi /* There must be no aio_bh_poll() calls going on */ 331*8c6b0356SStefan Hajnoczi assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list)); 332c2b38b27SPaolo Bonzini 333*8c6b0356SStefan Hajnoczi while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) { 334c2b38b27SPaolo Bonzini /* qemu_bh_delete() must have been called on BHs in this AioContext */ 335*8c6b0356SStefan Hajnoczi assert(flags & BH_DELETED); 336c2b38b27SPaolo Bonzini 337*8c6b0356SStefan Hajnoczi g_free(bh); 338c2b38b27SPaolo Bonzini } 339c2b38b27SPaolo Bonzini 340c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); 341c2b38b27SPaolo Bonzini event_notifier_cleanup(&ctx->notifier); 342c2b38b27SPaolo Bonzini qemu_rec_mutex_destroy(&ctx->lock); 343c2b38b27SPaolo Bonzini qemu_lockcnt_destroy(&ctx->list_lock); 344c2b38b27SPaolo Bonzini timerlistgroup_deinit(&ctx->tlg); 345cd0a6d2bSJie Wang aio_context_destroy(ctx); 346c2b38b27SPaolo Bonzini } 347c2b38b27SPaolo Bonzini 348c2b38b27SPaolo Bonzini static GSourceFuncs aio_source_funcs = { 349c2b38b27SPaolo Bonzini aio_ctx_prepare, 350c2b38b27SPaolo Bonzini aio_ctx_check, 351c2b38b27SPaolo Bonzini aio_ctx_dispatch, 352c2b38b27SPaolo Bonzini aio_ctx_finalize 353c2b38b27SPaolo Bonzini }; 354c2b38b27SPaolo Bonzini 355c2b38b27SPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx) 356c2b38b27SPaolo Bonzini { 357c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 358c2b38b27SPaolo Bonzini return &ctx->source; 359c2b38b27SPaolo Bonzini } 360c2b38b27SPaolo Bonzini 361c2b38b27SPaolo Bonzini ThreadPool *aio_get_thread_pool(AioContext *ctx) 362c2b38b27SPaolo Bonzini { 363c2b38b27SPaolo Bonzini if (!ctx->thread_pool) { 364c2b38b27SPaolo Bonzini ctx->thread_pool = thread_pool_new(ctx); 365c2b38b27SPaolo Bonzini } 366c2b38b27SPaolo Bonzini return ctx->thread_pool; 367c2b38b27SPaolo Bonzini } 368c2b38b27SPaolo Bonzini 369c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 370ed6e2161SNishanth Aravamudan LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp) 371c2b38b27SPaolo Bonzini { 372c2b38b27SPaolo Bonzini if (!ctx->linux_aio) { 373ed6e2161SNishanth Aravamudan ctx->linux_aio = laio_init(errp); 374ed6e2161SNishanth Aravamudan if (ctx->linux_aio) { 375c2b38b27SPaolo Bonzini laio_attach_aio_context(ctx->linux_aio, ctx); 376c2b38b27SPaolo Bonzini } 377ed6e2161SNishanth Aravamudan } 378ed6e2161SNishanth Aravamudan return ctx->linux_aio; 379ed6e2161SNishanth Aravamudan } 380ed6e2161SNishanth Aravamudan 381ed6e2161SNishanth Aravamudan LinuxAioState *aio_get_linux_aio(AioContext *ctx) 382ed6e2161SNishanth Aravamudan { 383ed6e2161SNishanth Aravamudan assert(ctx->linux_aio); 384c2b38b27SPaolo Bonzini return ctx->linux_aio; 385c2b38b27SPaolo Bonzini } 386c2b38b27SPaolo Bonzini #endif 387c2b38b27SPaolo Bonzini 388fcb7a4a4SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 389fcb7a4a4SAarushi Mehta LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp) 390fcb7a4a4SAarushi Mehta { 391fcb7a4a4SAarushi Mehta if (ctx->linux_io_uring) { 392fcb7a4a4SAarushi Mehta return ctx->linux_io_uring; 393fcb7a4a4SAarushi Mehta } 394fcb7a4a4SAarushi Mehta 395fcb7a4a4SAarushi Mehta ctx->linux_io_uring = luring_init(errp); 396fcb7a4a4SAarushi Mehta if (!ctx->linux_io_uring) { 397fcb7a4a4SAarushi Mehta return NULL; 398fcb7a4a4SAarushi Mehta } 399fcb7a4a4SAarushi Mehta 400fcb7a4a4SAarushi Mehta luring_attach_aio_context(ctx->linux_io_uring, ctx); 401fcb7a4a4SAarushi Mehta return ctx->linux_io_uring; 402fcb7a4a4SAarushi Mehta } 403fcb7a4a4SAarushi Mehta 404fcb7a4a4SAarushi Mehta LuringState *aio_get_linux_io_uring(AioContext *ctx) 405fcb7a4a4SAarushi Mehta { 406fcb7a4a4SAarushi Mehta assert(ctx->linux_io_uring); 407fcb7a4a4SAarushi Mehta return ctx->linux_io_uring; 408fcb7a4a4SAarushi Mehta } 409fcb7a4a4SAarushi Mehta #endif 410fcb7a4a4SAarushi Mehta 411c2b38b27SPaolo Bonzini void aio_notify(AioContext *ctx) 412c2b38b27SPaolo Bonzini { 413c2b38b27SPaolo Bonzini /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs 414c2b38b27SPaolo Bonzini * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. 415c2b38b27SPaolo Bonzini */ 416c2b38b27SPaolo Bonzini smp_mb(); 417c2b38b27SPaolo Bonzini if (ctx->notify_me) { 418c2b38b27SPaolo Bonzini event_notifier_set(&ctx->notifier); 419c2b38b27SPaolo Bonzini atomic_mb_set(&ctx->notified, true); 420c2b38b27SPaolo Bonzini } 421c2b38b27SPaolo Bonzini } 422c2b38b27SPaolo Bonzini 423c2b38b27SPaolo Bonzini void aio_notify_accept(AioContext *ctx) 424c2b38b27SPaolo Bonzini { 425873df2ceSMarc-André Lureau if (atomic_xchg(&ctx->notified, false) 426873df2ceSMarc-André Lureau #ifdef WIN32 427873df2ceSMarc-André Lureau || true 428873df2ceSMarc-André Lureau #endif 429873df2ceSMarc-André Lureau ) { 430c2b38b27SPaolo Bonzini event_notifier_test_and_clear(&ctx->notifier); 431c2b38b27SPaolo Bonzini } 432c2b38b27SPaolo Bonzini } 433c2b38b27SPaolo Bonzini 4343f53bc61SPaolo Bonzini static void aio_timerlist_notify(void *opaque, QEMUClockType type) 435c2b38b27SPaolo Bonzini { 436c2b38b27SPaolo Bonzini aio_notify(opaque); 437c2b38b27SPaolo Bonzini } 438c2b38b27SPaolo Bonzini 439c2b38b27SPaolo Bonzini static void event_notifier_dummy_cb(EventNotifier *e) 440c2b38b27SPaolo Bonzini { 441c2b38b27SPaolo Bonzini } 442c2b38b27SPaolo Bonzini 443c2b38b27SPaolo Bonzini /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */ 444c2b38b27SPaolo Bonzini static bool event_notifier_poll(void *opaque) 445c2b38b27SPaolo Bonzini { 446c2b38b27SPaolo Bonzini EventNotifier *e = opaque; 447c2b38b27SPaolo Bonzini AioContext *ctx = container_of(e, AioContext, notifier); 448c2b38b27SPaolo Bonzini 449c2b38b27SPaolo Bonzini return atomic_read(&ctx->notified); 450c2b38b27SPaolo Bonzini } 451c2b38b27SPaolo Bonzini 4520c330a73SPaolo Bonzini static void co_schedule_bh_cb(void *opaque) 4530c330a73SPaolo Bonzini { 4540c330a73SPaolo Bonzini AioContext *ctx = opaque; 4550c330a73SPaolo Bonzini QSLIST_HEAD(, Coroutine) straight, reversed; 4560c330a73SPaolo Bonzini 4570c330a73SPaolo Bonzini QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines); 4580c330a73SPaolo Bonzini QSLIST_INIT(&straight); 4590c330a73SPaolo Bonzini 4600c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&reversed)) { 4610c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&reversed); 4620c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next); 4630c330a73SPaolo Bonzini QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next); 4640c330a73SPaolo Bonzini } 4650c330a73SPaolo Bonzini 4660c330a73SPaolo Bonzini while (!QSLIST_EMPTY(&straight)) { 4670c330a73SPaolo Bonzini Coroutine *co = QSLIST_FIRST(&straight); 4680c330a73SPaolo Bonzini QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); 4690c330a73SPaolo Bonzini trace_aio_co_schedule_bh_cb(ctx, co); 4701919631eSPaolo Bonzini aio_context_acquire(ctx); 4716133b39fSJeff Cody 4726133b39fSJeff Cody /* Protected by write barrier in qemu_aio_coroutine_enter */ 4736133b39fSJeff Cody atomic_set(&co->scheduled, NULL); 4746808ae04SSergio Lopez qemu_aio_coroutine_enter(ctx, co); 4751919631eSPaolo Bonzini aio_context_release(ctx); 4760c330a73SPaolo Bonzini } 4770c330a73SPaolo Bonzini } 4780c330a73SPaolo Bonzini 479c2b38b27SPaolo Bonzini AioContext *aio_context_new(Error **errp) 480c2b38b27SPaolo Bonzini { 481c2b38b27SPaolo Bonzini int ret; 482c2b38b27SPaolo Bonzini AioContext *ctx; 483c2b38b27SPaolo Bonzini 484c2b38b27SPaolo Bonzini ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); 485*8c6b0356SStefan Hajnoczi QSLIST_INIT(&ctx->bh_list); 486*8c6b0356SStefan Hajnoczi QSIMPLEQ_INIT(&ctx->bh_slice_list); 487c2b38b27SPaolo Bonzini aio_context_setup(ctx); 488c2b38b27SPaolo Bonzini 489c2b38b27SPaolo Bonzini ret = event_notifier_init(&ctx->notifier, false); 490c2b38b27SPaolo Bonzini if (ret < 0) { 491c2b38b27SPaolo Bonzini error_setg_errno(errp, -ret, "Failed to initialize event notifier"); 492c2b38b27SPaolo Bonzini goto fail; 493c2b38b27SPaolo Bonzini } 494c2b38b27SPaolo Bonzini g_source_set_can_recurse(&ctx->source, true); 495c2b38b27SPaolo Bonzini qemu_lockcnt_init(&ctx->list_lock); 4960c330a73SPaolo Bonzini 4970c330a73SPaolo Bonzini ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx); 4980c330a73SPaolo Bonzini QSLIST_INIT(&ctx->scheduled_coroutines); 4990c330a73SPaolo Bonzini 500c2b38b27SPaolo Bonzini aio_set_event_notifier(ctx, &ctx->notifier, 501c2b38b27SPaolo Bonzini false, 502c2b38b27SPaolo Bonzini event_notifier_dummy_cb, 503c2b38b27SPaolo Bonzini event_notifier_poll); 504c2b38b27SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 505c2b38b27SPaolo Bonzini ctx->linux_aio = NULL; 506c2b38b27SPaolo Bonzini #endif 507fcb7a4a4SAarushi Mehta 508fcb7a4a4SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 509fcb7a4a4SAarushi Mehta ctx->linux_io_uring = NULL; 510fcb7a4a4SAarushi Mehta #endif 511fcb7a4a4SAarushi Mehta 512c2b38b27SPaolo Bonzini ctx->thread_pool = NULL; 513c2b38b27SPaolo Bonzini qemu_rec_mutex_init(&ctx->lock); 514c2b38b27SPaolo Bonzini timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); 515c2b38b27SPaolo Bonzini 516c2b38b27SPaolo Bonzini ctx->poll_ns = 0; 517c2b38b27SPaolo Bonzini ctx->poll_max_ns = 0; 518c2b38b27SPaolo Bonzini ctx->poll_grow = 0; 519c2b38b27SPaolo Bonzini ctx->poll_shrink = 0; 520c2b38b27SPaolo Bonzini 521c2b38b27SPaolo Bonzini return ctx; 522c2b38b27SPaolo Bonzini fail: 523c2b38b27SPaolo Bonzini g_source_destroy(&ctx->source); 524c2b38b27SPaolo Bonzini return NULL; 525c2b38b27SPaolo Bonzini } 526c2b38b27SPaolo Bonzini 5270c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, Coroutine *co) 5280c330a73SPaolo Bonzini { 5290c330a73SPaolo Bonzini trace_aio_co_schedule(ctx, co); 5306133b39fSJeff Cody const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL, 5316133b39fSJeff Cody __func__); 5326133b39fSJeff Cody 5336133b39fSJeff Cody if (scheduled) { 5346133b39fSJeff Cody fprintf(stderr, 5356133b39fSJeff Cody "%s: Co-routine was already scheduled in '%s'\n", 5366133b39fSJeff Cody __func__, scheduled); 5376133b39fSJeff Cody abort(); 5386133b39fSJeff Cody } 5396133b39fSJeff Cody 540f0f81002SStefan Hajnoczi /* The coroutine might run and release the last ctx reference before we 541f0f81002SStefan Hajnoczi * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until 542f0f81002SStefan Hajnoczi * we're done. 543f0f81002SStefan Hajnoczi */ 544f0f81002SStefan Hajnoczi aio_context_ref(ctx); 545f0f81002SStefan Hajnoczi 5460c330a73SPaolo Bonzini QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines, 5470c330a73SPaolo Bonzini co, co_scheduled_next); 5480c330a73SPaolo Bonzini qemu_bh_schedule(ctx->co_schedule_bh); 549f0f81002SStefan Hajnoczi 550f0f81002SStefan Hajnoczi aio_context_unref(ctx); 5510c330a73SPaolo Bonzini } 5520c330a73SPaolo Bonzini 5530c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co) 5540c330a73SPaolo Bonzini { 5550c330a73SPaolo Bonzini AioContext *ctx; 5560c330a73SPaolo Bonzini 5570c330a73SPaolo Bonzini /* Read coroutine before co->ctx. Matches smp_wmb in 5580c330a73SPaolo Bonzini * qemu_coroutine_enter. 5590c330a73SPaolo Bonzini */ 5600c330a73SPaolo Bonzini smp_read_barrier_depends(); 5610c330a73SPaolo Bonzini ctx = atomic_read(&co->ctx); 5620c330a73SPaolo Bonzini 5638865852eSFam Zheng aio_co_enter(ctx, co); 5648865852eSFam Zheng } 5658865852eSFam Zheng 5668865852eSFam Zheng void aio_co_enter(AioContext *ctx, struct Coroutine *co) 5678865852eSFam Zheng { 5680c330a73SPaolo Bonzini if (ctx != qemu_get_current_aio_context()) { 5690c330a73SPaolo Bonzini aio_co_schedule(ctx, co); 5700c330a73SPaolo Bonzini return; 5710c330a73SPaolo Bonzini } 5720c330a73SPaolo Bonzini 5730c330a73SPaolo Bonzini if (qemu_in_coroutine()) { 5740c330a73SPaolo Bonzini Coroutine *self = qemu_coroutine_self(); 5750c330a73SPaolo Bonzini assert(self != co); 5760c330a73SPaolo Bonzini QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); 5770c330a73SPaolo Bonzini } else { 5780c330a73SPaolo Bonzini aio_context_acquire(ctx); 5798865852eSFam Zheng qemu_aio_coroutine_enter(ctx, co); 5800c330a73SPaolo Bonzini aio_context_release(ctx); 5810c330a73SPaolo Bonzini } 5820c330a73SPaolo Bonzini } 5830c330a73SPaolo Bonzini 584c2b38b27SPaolo Bonzini void aio_context_ref(AioContext *ctx) 585c2b38b27SPaolo Bonzini { 586c2b38b27SPaolo Bonzini g_source_ref(&ctx->source); 587c2b38b27SPaolo Bonzini } 588c2b38b27SPaolo Bonzini 589c2b38b27SPaolo Bonzini void aio_context_unref(AioContext *ctx) 590c2b38b27SPaolo Bonzini { 591c2b38b27SPaolo Bonzini g_source_unref(&ctx->source); 592c2b38b27SPaolo Bonzini } 593c2b38b27SPaolo Bonzini 594c2b38b27SPaolo Bonzini void aio_context_acquire(AioContext *ctx) 595c2b38b27SPaolo Bonzini { 596c2b38b27SPaolo Bonzini qemu_rec_mutex_lock(&ctx->lock); 597c2b38b27SPaolo Bonzini } 598c2b38b27SPaolo Bonzini 599c2b38b27SPaolo Bonzini void aio_context_release(AioContext *ctx) 600c2b38b27SPaolo Bonzini { 601c2b38b27SPaolo Bonzini qemu_rec_mutex_unlock(&ctx->lock); 602c2b38b27SPaolo Bonzini } 603