1737e150eSPaolo Bonzini /* 2737e150eSPaolo Bonzini * QEMU aio implementation 3737e150eSPaolo Bonzini * 4737e150eSPaolo Bonzini * Copyright IBM, Corp. 2008 5737e150eSPaolo Bonzini * 6737e150eSPaolo Bonzini * Authors: 7737e150eSPaolo Bonzini * Anthony Liguori <aliguori@us.ibm.com> 8737e150eSPaolo Bonzini * 9737e150eSPaolo Bonzini * This work is licensed under the terms of the GNU GPL, version 2. See 10737e150eSPaolo Bonzini * the COPYING file in the top-level directory. 11737e150eSPaolo Bonzini * 12737e150eSPaolo Bonzini */ 13737e150eSPaolo Bonzini 14737e150eSPaolo Bonzini #ifndef QEMU_AIO_H 15737e150eSPaolo Bonzini #define QEMU_AIO_H 16737e150eSPaolo Bonzini 176a1751b7SAlex Bligh #include "qemu/typedefs.h" 18737e150eSPaolo Bonzini #include "qemu-common.h" 191de7afc9SPaolo Bonzini #include "qemu/queue.h" 201de7afc9SPaolo Bonzini #include "qemu/event_notifier.h" 21dcc772e2SLiu Ping Fan #include "qemu/thread.h" 2298563fc3SStefan Hajnoczi #include "qemu/rfifolock.h" 23dae21b98SAlex Bligh #include "qemu/timer.h" 24737e150eSPaolo Bonzini 257c84b1b8SMarkus Armbruster typedef struct BlockAIOCB BlockAIOCB; 26097310b5SMarkus Armbruster typedef void BlockCompletionFunc(void *opaque, int ret); 27737e150eSPaolo Bonzini 28737e150eSPaolo Bonzini typedef struct AIOCBInfo { 297c84b1b8SMarkus Armbruster void (*cancel_async)(BlockAIOCB *acb); 307c84b1b8SMarkus Armbruster AioContext *(*get_aio_context)(BlockAIOCB *acb); 31737e150eSPaolo Bonzini size_t aiocb_size; 32737e150eSPaolo Bonzini } AIOCBInfo; 33737e150eSPaolo Bonzini 347c84b1b8SMarkus Armbruster struct BlockAIOCB { 35737e150eSPaolo Bonzini const AIOCBInfo *aiocb_info; 36737e150eSPaolo Bonzini BlockDriverState *bs; 37097310b5SMarkus Armbruster BlockCompletionFunc *cb; 38737e150eSPaolo Bonzini void *opaque; 39f197fe2bSFam Zheng int refcnt; 40737e150eSPaolo Bonzini }; 41737e150eSPaolo Bonzini 42737e150eSPaolo Bonzini void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 43097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 448007429aSFam Zheng void qemu_aio_unref(void *p); 45f197fe2bSFam Zheng void qemu_aio_ref(void *p); 46737e150eSPaolo Bonzini 47737e150eSPaolo Bonzini typedef struct AioHandler AioHandler; 48737e150eSPaolo Bonzini typedef void QEMUBHFunc(void *opaque); 49737e150eSPaolo Bonzini typedef void IOHandler(void *opaque); 50737e150eSPaolo Bonzini 516a1751b7SAlex Bligh struct AioContext { 52737e150eSPaolo Bonzini GSource source; 53737e150eSPaolo Bonzini 5498563fc3SStefan Hajnoczi /* Protects all fields from multi-threaded access */ 5598563fc3SStefan Hajnoczi RFifoLock lock; 5698563fc3SStefan Hajnoczi 57737e150eSPaolo Bonzini /* The list of registered AIO handlers */ 58737e150eSPaolo Bonzini QLIST_HEAD(, AioHandler) aio_handlers; 59737e150eSPaolo Bonzini 60737e150eSPaolo Bonzini /* This is a simple lock used to protect the aio_handlers list. 61737e150eSPaolo Bonzini * Specifically, it's used to ensure that no callbacks are removed while 62737e150eSPaolo Bonzini * we're walking and dispatching callbacks. 63737e150eSPaolo Bonzini */ 64737e150eSPaolo Bonzini int walking_handlers; 65737e150eSPaolo Bonzini 66*eabc9779SPaolo Bonzini /* Used to avoid unnecessary event_notifier_set calls in aio_notify; 67*eabc9779SPaolo Bonzini * accessed with atomic primitives. If this field is 0, everything 68*eabc9779SPaolo Bonzini * (file descriptors, bottom halves, timers) will be re-evaluated 69*eabc9779SPaolo Bonzini * before the next blocking poll(), thus the event_notifier_set call 70*eabc9779SPaolo Bonzini * can be skipped. If it is non-zero, you may need to wake up a 71*eabc9779SPaolo Bonzini * concurrent aio_poll or the glib main event loop, making 72*eabc9779SPaolo Bonzini * event_notifier_set necessary. 73*eabc9779SPaolo Bonzini * 74*eabc9779SPaolo Bonzini * Bit 0 is reserved for GSource usage of the AioContext, and is 1 75*eabc9779SPaolo Bonzini * between a call to aio_ctx_check and the next call to aio_ctx_dispatch. 76*eabc9779SPaolo Bonzini * Bits 1-31 simply count the number of active calls to aio_poll 77*eabc9779SPaolo Bonzini * that are in the prepare or poll phase. 78*eabc9779SPaolo Bonzini * 79*eabc9779SPaolo Bonzini * The GSource and aio_poll must use a different mechanism because 80*eabc9779SPaolo Bonzini * there is no certainty that a call to GSource's prepare callback 81*eabc9779SPaolo Bonzini * (via g_main_context_prepare) is indeed followed by check and 82*eabc9779SPaolo Bonzini * dispatch. It's not clear whether this would be a bug, but let's 83*eabc9779SPaolo Bonzini * play safe and allow it---it will just cause extra calls to 84*eabc9779SPaolo Bonzini * event_notifier_set until the next call to dispatch. 85*eabc9779SPaolo Bonzini * 86*eabc9779SPaolo Bonzini * Instead, the aio_poll calls include both the prepare and the 87*eabc9779SPaolo Bonzini * dispatch phase, hence a simple counter is enough for them. 880ceb849bSPaolo Bonzini */ 89*eabc9779SPaolo Bonzini uint32_t notify_me; 900ceb849bSPaolo Bonzini 91dcc772e2SLiu Ping Fan /* lock to protect between bh's adders and deleter */ 92dcc772e2SLiu Ping Fan QemuMutex bh_lock; 930ceb849bSPaolo Bonzini 94737e150eSPaolo Bonzini /* Anchor of the list of Bottom Halves belonging to the context */ 95737e150eSPaolo Bonzini struct QEMUBH *first_bh; 96737e150eSPaolo Bonzini 97737e150eSPaolo Bonzini /* A simple lock used to protect the first_bh list, and ensure that 98737e150eSPaolo Bonzini * no callbacks are removed while we're walking and dispatching callbacks. 99737e150eSPaolo Bonzini */ 100737e150eSPaolo Bonzini int walking_bh; 101737e150eSPaolo Bonzini 102737e150eSPaolo Bonzini /* Used for aio_notify. */ 103737e150eSPaolo Bonzini EventNotifier notifier; 1046b5f8762SStefan Hajnoczi 1059b34277dSStefan Hajnoczi /* Thread pool for performing work and receiving completion callbacks */ 1069b34277dSStefan Hajnoczi struct ThreadPool *thread_pool; 107dae21b98SAlex Bligh 108dae21b98SAlex Bligh /* TimerLists for calling timers - one per clock type */ 109dae21b98SAlex Bligh QEMUTimerListGroup tlg; 1106a1751b7SAlex Bligh }; 111737e150eSPaolo Bonzini 112737e150eSPaolo Bonzini /** 113737e150eSPaolo Bonzini * aio_context_new: Allocate a new AioContext. 114737e150eSPaolo Bonzini * 115737e150eSPaolo Bonzini * AioContext provide a mini event-loop that can be waited on synchronously. 116737e150eSPaolo Bonzini * They also provide bottom halves, a service to execute a piece of code 117737e150eSPaolo Bonzini * as soon as possible. 118737e150eSPaolo Bonzini */ 1192f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp); 120737e150eSPaolo Bonzini 121737e150eSPaolo Bonzini /** 122737e150eSPaolo Bonzini * aio_context_ref: 123737e150eSPaolo Bonzini * @ctx: The AioContext to operate on. 124737e150eSPaolo Bonzini * 125737e150eSPaolo Bonzini * Add a reference to an AioContext. 126737e150eSPaolo Bonzini */ 127737e150eSPaolo Bonzini void aio_context_ref(AioContext *ctx); 128737e150eSPaolo Bonzini 129737e150eSPaolo Bonzini /** 130737e150eSPaolo Bonzini * aio_context_unref: 131737e150eSPaolo Bonzini * @ctx: The AioContext to operate on. 132737e150eSPaolo Bonzini * 133737e150eSPaolo Bonzini * Drop a reference to an AioContext. 134737e150eSPaolo Bonzini */ 135737e150eSPaolo Bonzini void aio_context_unref(AioContext *ctx); 136737e150eSPaolo Bonzini 13798563fc3SStefan Hajnoczi /* Take ownership of the AioContext. If the AioContext will be shared between 13849110174SPaolo Bonzini * threads, and a thread does not want to be interrupted, it will have to 13949110174SPaolo Bonzini * take ownership around calls to aio_poll(). Otherwise, aio_poll() 14049110174SPaolo Bonzini * automatically takes care of calling aio_context_acquire and 14149110174SPaolo Bonzini * aio_context_release. 14298563fc3SStefan Hajnoczi * 14349110174SPaolo Bonzini * Access to timers and BHs from a thread that has not acquired AioContext 14449110174SPaolo Bonzini * is possible. Access to callbacks for now must be done while the AioContext 14549110174SPaolo Bonzini * is owned by the thread (FIXME). 14698563fc3SStefan Hajnoczi */ 14798563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx); 14898563fc3SStefan Hajnoczi 14998563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */ 15098563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx); 15198563fc3SStefan Hajnoczi 152737e150eSPaolo Bonzini /** 153737e150eSPaolo Bonzini * aio_bh_new: Allocate a new bottom half structure. 154737e150eSPaolo Bonzini * 155737e150eSPaolo Bonzini * Bottom halves are lightweight callbacks whose invocation is guaranteed 156737e150eSPaolo Bonzini * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure 157737e150eSPaolo Bonzini * is opaque and must be allocated prior to its use. 158737e150eSPaolo Bonzini */ 159737e150eSPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 160737e150eSPaolo Bonzini 161737e150eSPaolo Bonzini /** 162737e150eSPaolo Bonzini * aio_notify: Force processing of pending events. 163737e150eSPaolo Bonzini * 164737e150eSPaolo Bonzini * Similar to signaling a condition variable, aio_notify forces 165737e150eSPaolo Bonzini * aio_wait to exit, so that the next call will re-examine pending events. 166737e150eSPaolo Bonzini * The caller of aio_notify will usually call aio_wait again very soon, 167737e150eSPaolo Bonzini * or go through another iteration of the GLib main loop. Hence, aio_notify 168737e150eSPaolo Bonzini * also has the side effect of recalculating the sets of file descriptors 169737e150eSPaolo Bonzini * that the main loop waits for. 170737e150eSPaolo Bonzini * 171737e150eSPaolo Bonzini * Calling aio_notify is rarely necessary, because for example scheduling 172737e150eSPaolo Bonzini * a bottom half calls it already. 173737e150eSPaolo Bonzini */ 174737e150eSPaolo Bonzini void aio_notify(AioContext *ctx); 175737e150eSPaolo Bonzini 176737e150eSPaolo Bonzini /** 177737e150eSPaolo Bonzini * aio_bh_poll: Poll bottom halves for an AioContext. 178737e150eSPaolo Bonzini * 179737e150eSPaolo Bonzini * These are internal functions used by the QEMU main loop. 180dcc772e2SLiu Ping Fan * And notice that multiple occurrences of aio_bh_poll cannot 181dcc772e2SLiu Ping Fan * be called concurrently 182737e150eSPaolo Bonzini */ 183737e150eSPaolo Bonzini int aio_bh_poll(AioContext *ctx); 184737e150eSPaolo Bonzini 185737e150eSPaolo Bonzini /** 186737e150eSPaolo Bonzini * qemu_bh_schedule: Schedule a bottom half. 187737e150eSPaolo Bonzini * 188737e150eSPaolo Bonzini * Scheduling a bottom half interrupts the main loop and causes the 189737e150eSPaolo Bonzini * execution of the callback that was passed to qemu_bh_new. 190737e150eSPaolo Bonzini * 191737e150eSPaolo Bonzini * Bottom halves that are scheduled from a bottom half handler are instantly 192737e150eSPaolo Bonzini * invoked. This can create an infinite loop if a bottom half handler 193737e150eSPaolo Bonzini * schedules itself. 194737e150eSPaolo Bonzini * 195737e150eSPaolo Bonzini * @bh: The bottom half to be scheduled. 196737e150eSPaolo Bonzini */ 197737e150eSPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh); 198737e150eSPaolo Bonzini 199737e150eSPaolo Bonzini /** 200737e150eSPaolo Bonzini * qemu_bh_cancel: Cancel execution of a bottom half. 201737e150eSPaolo Bonzini * 202737e150eSPaolo Bonzini * Canceling execution of a bottom half undoes the effect of calls to 203737e150eSPaolo Bonzini * qemu_bh_schedule without freeing its resources yet. While cancellation 204737e150eSPaolo Bonzini * itself is also wait-free and thread-safe, it can of course race with the 205737e150eSPaolo Bonzini * loop that executes bottom halves unless you are holding the iothread 206737e150eSPaolo Bonzini * mutex. This makes it mostly useless if you are not holding the mutex. 207737e150eSPaolo Bonzini * 208737e150eSPaolo Bonzini * @bh: The bottom half to be canceled. 209737e150eSPaolo Bonzini */ 210737e150eSPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh); 211737e150eSPaolo Bonzini 212737e150eSPaolo Bonzini /** 213737e150eSPaolo Bonzini *qemu_bh_delete: Cancel execution of a bottom half and free its resources. 214737e150eSPaolo Bonzini * 215737e150eSPaolo Bonzini * Deleting a bottom half frees the memory that was allocated for it by 216737e150eSPaolo Bonzini * qemu_bh_new. It also implies canceling the bottom half if it was 217737e150eSPaolo Bonzini * scheduled. 218dcc772e2SLiu Ping Fan * This func is async. The bottom half will do the delete action at the finial 219dcc772e2SLiu Ping Fan * end. 220737e150eSPaolo Bonzini * 221737e150eSPaolo Bonzini * @bh: The bottom half to be deleted. 222737e150eSPaolo Bonzini */ 223737e150eSPaolo Bonzini void qemu_bh_delete(QEMUBH *bh); 224737e150eSPaolo Bonzini 225737e150eSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 226a3462c65SPaolo Bonzini * attached to the AioContext, before g_poll is invoked. 227a3462c65SPaolo Bonzini * 228a3462c65SPaolo Bonzini * This is used internally in the implementation of the GSource. 229a3462c65SPaolo Bonzini */ 230a3462c65SPaolo Bonzini bool aio_prepare(AioContext *ctx); 231a3462c65SPaolo Bonzini 232a3462c65SPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 233a3462c65SPaolo Bonzini * attached to the AioContext, after g_poll is invoked. 234737e150eSPaolo Bonzini * 235737e150eSPaolo Bonzini * This is used internally in the implementation of the GSource. 236737e150eSPaolo Bonzini */ 237737e150eSPaolo Bonzini bool aio_pending(AioContext *ctx); 238737e150eSPaolo Bonzini 239e4c7e2d1SPaolo Bonzini /* Dispatch any pending callbacks from the GSource attached to the AioContext. 240e4c7e2d1SPaolo Bonzini * 241e4c7e2d1SPaolo Bonzini * This is used internally in the implementation of the GSource. 242e4c7e2d1SPaolo Bonzini */ 243e4c7e2d1SPaolo Bonzini bool aio_dispatch(AioContext *ctx); 244e4c7e2d1SPaolo Bonzini 245737e150eSPaolo Bonzini /* Progress in completing AIO work to occur. This can issue new pending 246737e150eSPaolo Bonzini * aio as a result of executing I/O completion or bh callbacks. 247737e150eSPaolo Bonzini * 248acfb23adSPaolo Bonzini * Return whether any progress was made by executing AIO or bottom half 249acfb23adSPaolo Bonzini * handlers. If @blocking == true, this should always be true except 250acfb23adSPaolo Bonzini * if someone called aio_notify. 251737e150eSPaolo Bonzini * 252737e150eSPaolo Bonzini * If there are no pending bottom halves, but there are pending AIO 253737e150eSPaolo Bonzini * operations, it may not be possible to make any progress without 254737e150eSPaolo Bonzini * blocking. If @blocking is true, this function will wait until one 255737e150eSPaolo Bonzini * or more AIO events have completed, to ensure something has moved 256737e150eSPaolo Bonzini * before returning. 257737e150eSPaolo Bonzini */ 258737e150eSPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking); 259737e150eSPaolo Bonzini 260737e150eSPaolo Bonzini /* Register a file descriptor and associated callbacks. Behaves very similarly 2616484e422SFam Zheng * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will 26287f68d31SPaolo Bonzini * be invoked when using aio_poll(). 263737e150eSPaolo Bonzini * 264737e150eSPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 265737e150eSPaolo Bonzini * instead of qemu_set_fd_handler[2]. 266737e150eSPaolo Bonzini */ 267737e150eSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx, 268737e150eSPaolo Bonzini int fd, 269737e150eSPaolo Bonzini IOHandler *io_read, 270737e150eSPaolo Bonzini IOHandler *io_write, 271737e150eSPaolo Bonzini void *opaque); 272737e150eSPaolo Bonzini 273737e150eSPaolo Bonzini /* Register an event notifier and associated callbacks. Behaves very similarly 274737e150eSPaolo Bonzini * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks 27587f68d31SPaolo Bonzini * will be invoked when using aio_poll(). 276737e150eSPaolo Bonzini * 277737e150eSPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 278737e150eSPaolo Bonzini * instead of event_notifier_set_handler. 279737e150eSPaolo Bonzini */ 280737e150eSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx, 281737e150eSPaolo Bonzini EventNotifier *notifier, 282f2e5dca4SStefan Hajnoczi EventNotifierHandler *io_read); 283737e150eSPaolo Bonzini 284737e150eSPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached 285737e150eSPaolo Bonzini * to this AioContext. 286737e150eSPaolo Bonzini */ 287737e150eSPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx); 288737e150eSPaolo Bonzini 2899b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */ 2909b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx); 2919b34277dSStefan Hajnoczi 2924e29e831SAlex Bligh /** 2934e29e831SAlex Bligh * aio_timer_new: 2944e29e831SAlex Bligh * @ctx: the aio context 2954e29e831SAlex Bligh * @type: the clock type 2964e29e831SAlex Bligh * @scale: the scale 2974e29e831SAlex Bligh * @cb: the callback to call on timer expiry 2984e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 2994e29e831SAlex Bligh * 3004e29e831SAlex Bligh * Allocate a new timer attached to the context @ctx. 3014e29e831SAlex Bligh * The function is responsible for memory allocation. 3024e29e831SAlex Bligh * 3034e29e831SAlex Bligh * The preferred interface is aio_timer_init. Use that 3044e29e831SAlex Bligh * unless you really need dynamic memory allocation. 3054e29e831SAlex Bligh * 3064e29e831SAlex Bligh * Returns: a pointer to the new timer 3074e29e831SAlex Bligh */ 3084e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, 3094e29e831SAlex Bligh int scale, 3104e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 3114e29e831SAlex Bligh { 3124e29e831SAlex Bligh return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); 3134e29e831SAlex Bligh } 3144e29e831SAlex Bligh 3154e29e831SAlex Bligh /** 3164e29e831SAlex Bligh * aio_timer_init: 3174e29e831SAlex Bligh * @ctx: the aio context 3184e29e831SAlex Bligh * @ts: the timer 3194e29e831SAlex Bligh * @type: the clock type 3204e29e831SAlex Bligh * @scale: the scale 3214e29e831SAlex Bligh * @cb: the callback to call on timer expiry 3224e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 3234e29e831SAlex Bligh * 3244e29e831SAlex Bligh * Initialise a new timer attached to the context @ctx. 3254e29e831SAlex Bligh * The caller is responsible for memory allocation. 3264e29e831SAlex Bligh */ 3274e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx, 3284e29e831SAlex Bligh QEMUTimer *ts, QEMUClockType type, 3294e29e831SAlex Bligh int scale, 3304e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 3314e29e831SAlex Bligh { 332f186aa97SPaolo Bonzini timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque); 3334e29e831SAlex Bligh } 3344e29e831SAlex Bligh 335845ca10dSPaolo Bonzini /** 336845ca10dSPaolo Bonzini * aio_compute_timeout: 337845ca10dSPaolo Bonzini * @ctx: the aio context 338845ca10dSPaolo Bonzini * 339845ca10dSPaolo Bonzini * Compute the timeout that a blocking aio_poll should use. 340845ca10dSPaolo Bonzini */ 341845ca10dSPaolo Bonzini int64_t aio_compute_timeout(AioContext *ctx); 342845ca10dSPaolo Bonzini 343737e150eSPaolo Bonzini #endif 344