1737e150eSPaolo Bonzini /* 2737e150eSPaolo Bonzini * QEMU aio implementation 3737e150eSPaolo Bonzini * 4737e150eSPaolo Bonzini * Copyright IBM, Corp. 2008 5737e150eSPaolo Bonzini * 6737e150eSPaolo Bonzini * Authors: 7737e150eSPaolo Bonzini * Anthony Liguori <aliguori@us.ibm.com> 8737e150eSPaolo Bonzini * 9737e150eSPaolo Bonzini * This work is licensed under the terms of the GNU GPL, version 2. See 10737e150eSPaolo Bonzini * the COPYING file in the top-level directory. 11737e150eSPaolo Bonzini * 12737e150eSPaolo Bonzini */ 13737e150eSPaolo Bonzini 14737e150eSPaolo Bonzini #ifndef QEMU_AIO_H 15737e150eSPaolo Bonzini #define QEMU_AIO_H 16737e150eSPaolo Bonzini 171de7afc9SPaolo Bonzini #include "qemu/queue.h" 181de7afc9SPaolo Bonzini #include "qemu/event_notifier.h" 19dcc772e2SLiu Ping Fan #include "qemu/thread.h" 20dae21b98SAlex Bligh #include "qemu/timer.h" 21737e150eSPaolo Bonzini 227c84b1b8SMarkus Armbruster typedef struct BlockAIOCB BlockAIOCB; 23097310b5SMarkus Armbruster typedef void BlockCompletionFunc(void *opaque, int ret); 24737e150eSPaolo Bonzini 25737e150eSPaolo Bonzini typedef struct AIOCBInfo { 267c84b1b8SMarkus Armbruster void (*cancel_async)(BlockAIOCB *acb); 277c84b1b8SMarkus Armbruster AioContext *(*get_aio_context)(BlockAIOCB *acb); 28737e150eSPaolo Bonzini size_t aiocb_size; 29737e150eSPaolo Bonzini } AIOCBInfo; 30737e150eSPaolo Bonzini 317c84b1b8SMarkus Armbruster struct BlockAIOCB { 32737e150eSPaolo Bonzini const AIOCBInfo *aiocb_info; 33737e150eSPaolo Bonzini BlockDriverState *bs; 34097310b5SMarkus Armbruster BlockCompletionFunc *cb; 35737e150eSPaolo Bonzini void *opaque; 36f197fe2bSFam Zheng int refcnt; 37737e150eSPaolo Bonzini }; 38737e150eSPaolo Bonzini 39737e150eSPaolo Bonzini void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 40097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 418007429aSFam Zheng void qemu_aio_unref(void *p); 42f197fe2bSFam Zheng void qemu_aio_ref(void *p); 43737e150eSPaolo Bonzini 44737e150eSPaolo Bonzini typedef struct AioHandler AioHandler; 454749079cSStefan Hajnoczi typedef QLIST_HEAD(, AioHandler) AioHandlerList; 46737e150eSPaolo Bonzini typedef void QEMUBHFunc(void *opaque); 47f6a51c84SStefan Hajnoczi typedef bool AioPollFn(void *opaque); 48737e150eSPaolo Bonzini typedef void IOHandler(void *opaque); 49737e150eSPaolo Bonzini 500c330a73SPaolo Bonzini struct Coroutine; 510187f5c9SPaolo Bonzini struct ThreadPool; 520187f5c9SPaolo Bonzini struct LinuxAioState; 536663a0a3SAarushi Mehta struct LuringState; 540187f5c9SPaolo Bonzini 55*1f050a46SStefan Hajnoczi /* Callbacks for file descriptor monitoring implementations */ 56*1f050a46SStefan Hajnoczi typedef struct { 57*1f050a46SStefan Hajnoczi /* 58*1f050a46SStefan Hajnoczi * update: 59*1f050a46SStefan Hajnoczi * @ctx: the AioContext 60*1f050a46SStefan Hajnoczi * @node: the handler 61*1f050a46SStefan Hajnoczi * @is_new: is the file descriptor already being monitored? 62*1f050a46SStefan Hajnoczi * 63*1f050a46SStefan Hajnoczi * Add/remove/modify a monitored file descriptor. There are three cases: 64*1f050a46SStefan Hajnoczi * 1. node->pfd.events == 0 means remove the file descriptor. 65*1f050a46SStefan Hajnoczi * 2. !is_new means modify an already monitored file descriptor. 66*1f050a46SStefan Hajnoczi * 3. is_new means add a new file descriptor. 67*1f050a46SStefan Hajnoczi * 68*1f050a46SStefan Hajnoczi * Called with ctx->list_lock acquired. 69*1f050a46SStefan Hajnoczi */ 70*1f050a46SStefan Hajnoczi void (*update)(AioContext *ctx, AioHandler *node, bool is_new); 71*1f050a46SStefan Hajnoczi 72*1f050a46SStefan Hajnoczi /* 73*1f050a46SStefan Hajnoczi * wait: 74*1f050a46SStefan Hajnoczi * @ctx: the AioContext 75*1f050a46SStefan Hajnoczi * @ready_list: list for handlers that become ready 76*1f050a46SStefan Hajnoczi * @timeout: maximum duration to wait, in nanoseconds 77*1f050a46SStefan Hajnoczi * 78*1f050a46SStefan Hajnoczi * Wait for file descriptors to become ready and place them on ready_list. 79*1f050a46SStefan Hajnoczi * 80*1f050a46SStefan Hajnoczi * Called with ctx->list_lock incremented but not locked. 81*1f050a46SStefan Hajnoczi * 82*1f050a46SStefan Hajnoczi * Returns: number of ready file descriptors. 83*1f050a46SStefan Hajnoczi */ 84*1f050a46SStefan Hajnoczi int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout); 85*1f050a46SStefan Hajnoczi } FDMonOps; 86*1f050a46SStefan Hajnoczi 878c6b0356SStefan Hajnoczi /* 888c6b0356SStefan Hajnoczi * Each aio_bh_poll() call carves off a slice of the BH list, so that newly 898c6b0356SStefan Hajnoczi * scheduled BHs are not processed until the next aio_bh_poll() call. All 908c6b0356SStefan Hajnoczi * active aio_bh_poll() calls chain their slices together in a list, so that 918c6b0356SStefan Hajnoczi * nested aio_bh_poll() calls process all scheduled bottom halves. 928c6b0356SStefan Hajnoczi */ 938c6b0356SStefan Hajnoczi typedef QSLIST_HEAD(, QEMUBH) BHList; 948c6b0356SStefan Hajnoczi typedef struct BHListSlice BHListSlice; 958c6b0356SStefan Hajnoczi struct BHListSlice { 968c6b0356SStefan Hajnoczi BHList bh_list; 978c6b0356SStefan Hajnoczi QSIMPLEQ_ENTRY(BHListSlice) next; 988c6b0356SStefan Hajnoczi }; 998c6b0356SStefan Hajnoczi 1006a1751b7SAlex Bligh struct AioContext { 101737e150eSPaolo Bonzini GSource source; 102737e150eSPaolo Bonzini 1037c690fd1SPaolo Bonzini /* Used by AioContext users to protect from multi-threaded access. */ 1043fe71223SPaolo Bonzini QemuRecMutex lock; 10598563fc3SStefan Hajnoczi 1067c690fd1SPaolo Bonzini /* The list of registered AIO handlers. Protected by ctx->list_lock. */ 1074749079cSStefan Hajnoczi AioHandlerList aio_handlers; 1084749079cSStefan Hajnoczi 1094749079cSStefan Hajnoczi /* The list of AIO handlers to be deleted. Protected by ctx->list_lock. */ 1104749079cSStefan Hajnoczi AioHandlerList deleted_aio_handlers; 111737e150eSPaolo Bonzini 112eabc9779SPaolo Bonzini /* Used to avoid unnecessary event_notifier_set calls in aio_notify; 113eabc9779SPaolo Bonzini * accessed with atomic primitives. If this field is 0, everything 114eabc9779SPaolo Bonzini * (file descriptors, bottom halves, timers) will be re-evaluated 115eabc9779SPaolo Bonzini * before the next blocking poll(), thus the event_notifier_set call 116eabc9779SPaolo Bonzini * can be skipped. If it is non-zero, you may need to wake up a 117eabc9779SPaolo Bonzini * concurrent aio_poll or the glib main event loop, making 118eabc9779SPaolo Bonzini * event_notifier_set necessary. 119eabc9779SPaolo Bonzini * 120eabc9779SPaolo Bonzini * Bit 0 is reserved for GSource usage of the AioContext, and is 1 12154a16a63SCao jin * between a call to aio_ctx_prepare and the next call to aio_ctx_check. 122eabc9779SPaolo Bonzini * Bits 1-31 simply count the number of active calls to aio_poll 123eabc9779SPaolo Bonzini * that are in the prepare or poll phase. 124eabc9779SPaolo Bonzini * 125eabc9779SPaolo Bonzini * The GSource and aio_poll must use a different mechanism because 126eabc9779SPaolo Bonzini * there is no certainty that a call to GSource's prepare callback 127eabc9779SPaolo Bonzini * (via g_main_context_prepare) is indeed followed by check and 128eabc9779SPaolo Bonzini * dispatch. It's not clear whether this would be a bug, but let's 129eabc9779SPaolo Bonzini * play safe and allow it---it will just cause extra calls to 130eabc9779SPaolo Bonzini * event_notifier_set until the next call to dispatch. 131eabc9779SPaolo Bonzini * 132eabc9779SPaolo Bonzini * Instead, the aio_poll calls include both the prepare and the 133eabc9779SPaolo Bonzini * dispatch phase, hence a simple counter is enough for them. 1340ceb849bSPaolo Bonzini */ 135eabc9779SPaolo Bonzini uint32_t notify_me; 1360ceb849bSPaolo Bonzini 1377c690fd1SPaolo Bonzini /* A lock to protect between QEMUBH and AioHandler adders and deleter, 1387c690fd1SPaolo Bonzini * and to ensure that no callbacks are removed while we're walking and 1397c690fd1SPaolo Bonzini * dispatching them. 140d7c99a12SPaolo Bonzini */ 141d7c99a12SPaolo Bonzini QemuLockCnt list_lock; 1420ceb849bSPaolo Bonzini 1438c6b0356SStefan Hajnoczi /* Bottom Halves pending aio_bh_poll() processing */ 1448c6b0356SStefan Hajnoczi BHList bh_list; 1458c6b0356SStefan Hajnoczi 1468c6b0356SStefan Hajnoczi /* Chained BH list slices for each nested aio_bh_poll() call */ 1478c6b0356SStefan Hajnoczi QSIMPLEQ_HEAD(, BHListSlice) bh_slice_list; 148737e150eSPaolo Bonzini 14905e514b1SPaolo Bonzini /* Used by aio_notify. 15005e514b1SPaolo Bonzini * 15105e514b1SPaolo Bonzini * "notified" is used to avoid expensive event_notifier_test_and_clear 15205e514b1SPaolo Bonzini * calls. When it is clear, the EventNotifier is clear, or one thread 15305e514b1SPaolo Bonzini * is going to clear "notified" before processing more events. False 15405e514b1SPaolo Bonzini * positives are possible, i.e. "notified" could be set even though the 15505e514b1SPaolo Bonzini * EventNotifier is clear. 15605e514b1SPaolo Bonzini * 15705e514b1SPaolo Bonzini * Note that event_notifier_set *cannot* be optimized the same way. For 15805e514b1SPaolo Bonzini * more information on the problem that would result, see "#ifdef BUG2" 15905e514b1SPaolo Bonzini * in the docs/aio_notify_accept.promela formal model. 16005e514b1SPaolo Bonzini */ 16105e514b1SPaolo Bonzini bool notified; 162737e150eSPaolo Bonzini EventNotifier notifier; 1636b5f8762SStefan Hajnoczi 1640c330a73SPaolo Bonzini QSLIST_HEAD(, Coroutine) scheduled_coroutines; 1650c330a73SPaolo Bonzini QEMUBH *co_schedule_bh; 1660c330a73SPaolo Bonzini 1677c690fd1SPaolo Bonzini /* Thread pool for performing work and receiving completion callbacks. 1687c690fd1SPaolo Bonzini * Has its own locking. 1697c690fd1SPaolo Bonzini */ 1709b34277dSStefan Hajnoczi struct ThreadPool *thread_pool; 171dae21b98SAlex Bligh 1720187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO 1736663a0a3SAarushi Mehta /* 1746663a0a3SAarushi Mehta * State for native Linux AIO. Uses aio_context_acquire/release for 1750187f5c9SPaolo Bonzini * locking. 1760187f5c9SPaolo Bonzini */ 1770187f5c9SPaolo Bonzini struct LinuxAioState *linux_aio; 1780187f5c9SPaolo Bonzini #endif 1796663a0a3SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING 1806663a0a3SAarushi Mehta /* 1816663a0a3SAarushi Mehta * State for Linux io_uring. Uses aio_context_acquire/release for 1826663a0a3SAarushi Mehta * locking. 1836663a0a3SAarushi Mehta */ 1846663a0a3SAarushi Mehta struct LuringState *linux_io_uring; 1856663a0a3SAarushi Mehta #endif 1860187f5c9SPaolo Bonzini 1877c690fd1SPaolo Bonzini /* TimerLists for calling timers - one per clock type. Has its own 1887c690fd1SPaolo Bonzini * locking. 1897c690fd1SPaolo Bonzini */ 190dae21b98SAlex Bligh QEMUTimerListGroup tlg; 191c1e1e5faSFam Zheng 192c1e1e5faSFam Zheng int external_disable_cnt; 193fbe3fc5cSFam Zheng 1944a1cba38SStefan Hajnoczi /* Number of AioHandlers without .io_poll() */ 1954a1cba38SStefan Hajnoczi int poll_disable_cnt; 1964a1cba38SStefan Hajnoczi 19782a41186SStefan Hajnoczi /* Polling mode parameters */ 19882a41186SStefan Hajnoczi int64_t poll_ns; /* current polling time in nanoseconds */ 19982a41186SStefan Hajnoczi int64_t poll_max_ns; /* maximum polling time in nanoseconds */ 20082a41186SStefan Hajnoczi int64_t poll_grow; /* polling time growth factor */ 20182a41186SStefan Hajnoczi int64_t poll_shrink; /* polling time shrink factor */ 2024a1cba38SStefan Hajnoczi 203684e508cSStefan Hajnoczi /* Are we in polling mode or monitoring file descriptors? */ 204684e508cSStefan Hajnoczi bool poll_started; 205684e508cSStefan Hajnoczi 206fbe3fc5cSFam Zheng /* epoll(7) state used when built with CONFIG_EPOLL */ 207fbe3fc5cSFam Zheng int epollfd; 208*1f050a46SStefan Hajnoczi 209*1f050a46SStefan Hajnoczi const FDMonOps *fdmon_ops; 2106a1751b7SAlex Bligh }; 211737e150eSPaolo Bonzini 212737e150eSPaolo Bonzini /** 213737e150eSPaolo Bonzini * aio_context_new: Allocate a new AioContext. 214737e150eSPaolo Bonzini * 215737e150eSPaolo Bonzini * AioContext provide a mini event-loop that can be waited on synchronously. 216737e150eSPaolo Bonzini * They also provide bottom halves, a service to execute a piece of code 217737e150eSPaolo Bonzini * as soon as possible. 218737e150eSPaolo Bonzini */ 2192f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp); 220737e150eSPaolo Bonzini 221737e150eSPaolo Bonzini /** 222737e150eSPaolo Bonzini * aio_context_ref: 223737e150eSPaolo Bonzini * @ctx: The AioContext to operate on. 224737e150eSPaolo Bonzini * 225737e150eSPaolo Bonzini * Add a reference to an AioContext. 226737e150eSPaolo Bonzini */ 227737e150eSPaolo Bonzini void aio_context_ref(AioContext *ctx); 228737e150eSPaolo Bonzini 229737e150eSPaolo Bonzini /** 230737e150eSPaolo Bonzini * aio_context_unref: 231737e150eSPaolo Bonzini * @ctx: The AioContext to operate on. 232737e150eSPaolo Bonzini * 233737e150eSPaolo Bonzini * Drop a reference to an AioContext. 234737e150eSPaolo Bonzini */ 235737e150eSPaolo Bonzini void aio_context_unref(AioContext *ctx); 236737e150eSPaolo Bonzini 23798563fc3SStefan Hajnoczi /* Take ownership of the AioContext. If the AioContext will be shared between 23849110174SPaolo Bonzini * threads, and a thread does not want to be interrupted, it will have to 23949110174SPaolo Bonzini * take ownership around calls to aio_poll(). Otherwise, aio_poll() 24049110174SPaolo Bonzini * automatically takes care of calling aio_context_acquire and 24149110174SPaolo Bonzini * aio_context_release. 24298563fc3SStefan Hajnoczi * 2437c690fd1SPaolo Bonzini * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A 2447c690fd1SPaolo Bonzini * thread still has to call those to avoid being interrupted by the guest. 2457c690fd1SPaolo Bonzini * 2467c690fd1SPaolo Bonzini * Bottom halves, timers and callbacks can be created or removed without 2477c690fd1SPaolo Bonzini * acquiring the AioContext. 24898563fc3SStefan Hajnoczi */ 24998563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx); 25098563fc3SStefan Hajnoczi 25198563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */ 25298563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx); 25398563fc3SStefan Hajnoczi 254737e150eSPaolo Bonzini /** 2555b8bb359SPaolo Bonzini * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run 2565b8bb359SPaolo Bonzini * only once and as soon as possible. 2575b8bb359SPaolo Bonzini */ 2585b8bb359SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 2595b8bb359SPaolo Bonzini 2605b8bb359SPaolo Bonzini /** 261737e150eSPaolo Bonzini * aio_bh_new: Allocate a new bottom half structure. 262737e150eSPaolo Bonzini * 263737e150eSPaolo Bonzini * Bottom halves are lightweight callbacks whose invocation is guaranteed 264737e150eSPaolo Bonzini * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure 265737e150eSPaolo Bonzini * is opaque and must be allocated prior to its use. 266737e150eSPaolo Bonzini */ 267737e150eSPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); 268737e150eSPaolo Bonzini 269737e150eSPaolo Bonzini /** 270737e150eSPaolo Bonzini * aio_notify: Force processing of pending events. 271737e150eSPaolo Bonzini * 272737e150eSPaolo Bonzini * Similar to signaling a condition variable, aio_notify forces 273722f8d90SYaowei Bai * aio_poll to exit, so that the next call will re-examine pending events. 274722f8d90SYaowei Bai * The caller of aio_notify will usually call aio_poll again very soon, 275737e150eSPaolo Bonzini * or go through another iteration of the GLib main loop. Hence, aio_notify 276737e150eSPaolo Bonzini * also has the side effect of recalculating the sets of file descriptors 277737e150eSPaolo Bonzini * that the main loop waits for. 278737e150eSPaolo Bonzini * 279737e150eSPaolo Bonzini * Calling aio_notify is rarely necessary, because for example scheduling 280737e150eSPaolo Bonzini * a bottom half calls it already. 281737e150eSPaolo Bonzini */ 282737e150eSPaolo Bonzini void aio_notify(AioContext *ctx); 283737e150eSPaolo Bonzini 284737e150eSPaolo Bonzini /** 28505e514b1SPaolo Bonzini * aio_notify_accept: Acknowledge receiving an aio_notify. 28605e514b1SPaolo Bonzini * 28705e514b1SPaolo Bonzini * aio_notify() uses an EventNotifier in order to wake up a sleeping 28805e514b1SPaolo Bonzini * aio_poll() or g_main_context_iteration(). Calls to aio_notify() are 28905e514b1SPaolo Bonzini * usually rare, but the AioContext has to clear the EventNotifier on 29005e514b1SPaolo Bonzini * every aio_poll() or g_main_context_iteration() in order to avoid 29105e514b1SPaolo Bonzini * busy waiting. This event_notifier_test_and_clear() cannot be done 29205e514b1SPaolo Bonzini * using the usual aio_context_set_event_notifier(), because it must 29305e514b1SPaolo Bonzini * be done before processing all events (file descriptors, bottom halves, 29405e514b1SPaolo Bonzini * timers). 29505e514b1SPaolo Bonzini * 29605e514b1SPaolo Bonzini * aio_notify_accept() is an optimized event_notifier_test_and_clear() 29705e514b1SPaolo Bonzini * that is specific to an AioContext's notifier; it is used internally 29805e514b1SPaolo Bonzini * to clear the EventNotifier only if aio_notify() had been called. 29905e514b1SPaolo Bonzini */ 30005e514b1SPaolo Bonzini void aio_notify_accept(AioContext *ctx); 30105e514b1SPaolo Bonzini 30205e514b1SPaolo Bonzini /** 303df281b80SPavel Dovgalyuk * aio_bh_call: Executes callback function of the specified BH. 304df281b80SPavel Dovgalyuk */ 305df281b80SPavel Dovgalyuk void aio_bh_call(QEMUBH *bh); 306df281b80SPavel Dovgalyuk 307df281b80SPavel Dovgalyuk /** 308737e150eSPaolo Bonzini * aio_bh_poll: Poll bottom halves for an AioContext. 309737e150eSPaolo Bonzini * 310737e150eSPaolo Bonzini * These are internal functions used by the QEMU main loop. 311dcc772e2SLiu Ping Fan * And notice that multiple occurrences of aio_bh_poll cannot 312dcc772e2SLiu Ping Fan * be called concurrently 313737e150eSPaolo Bonzini */ 314737e150eSPaolo Bonzini int aio_bh_poll(AioContext *ctx); 315737e150eSPaolo Bonzini 316737e150eSPaolo Bonzini /** 317737e150eSPaolo Bonzini * qemu_bh_schedule: Schedule a bottom half. 318737e150eSPaolo Bonzini * 319737e150eSPaolo Bonzini * Scheduling a bottom half interrupts the main loop and causes the 320737e150eSPaolo Bonzini * execution of the callback that was passed to qemu_bh_new. 321737e150eSPaolo Bonzini * 322737e150eSPaolo Bonzini * Bottom halves that are scheduled from a bottom half handler are instantly 323737e150eSPaolo Bonzini * invoked. This can create an infinite loop if a bottom half handler 324737e150eSPaolo Bonzini * schedules itself. 325737e150eSPaolo Bonzini * 326737e150eSPaolo Bonzini * @bh: The bottom half to be scheduled. 327737e150eSPaolo Bonzini */ 328737e150eSPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh); 329737e150eSPaolo Bonzini 330737e150eSPaolo Bonzini /** 331737e150eSPaolo Bonzini * qemu_bh_cancel: Cancel execution of a bottom half. 332737e150eSPaolo Bonzini * 333737e150eSPaolo Bonzini * Canceling execution of a bottom half undoes the effect of calls to 334737e150eSPaolo Bonzini * qemu_bh_schedule without freeing its resources yet. While cancellation 335737e150eSPaolo Bonzini * itself is also wait-free and thread-safe, it can of course race with the 336737e150eSPaolo Bonzini * loop that executes bottom halves unless you are holding the iothread 337737e150eSPaolo Bonzini * mutex. This makes it mostly useless if you are not holding the mutex. 338737e150eSPaolo Bonzini * 339737e150eSPaolo Bonzini * @bh: The bottom half to be canceled. 340737e150eSPaolo Bonzini */ 341737e150eSPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh); 342737e150eSPaolo Bonzini 343737e150eSPaolo Bonzini /** 344737e150eSPaolo Bonzini *qemu_bh_delete: Cancel execution of a bottom half and free its resources. 345737e150eSPaolo Bonzini * 346737e150eSPaolo Bonzini * Deleting a bottom half frees the memory that was allocated for it by 347737e150eSPaolo Bonzini * qemu_bh_new. It also implies canceling the bottom half if it was 348737e150eSPaolo Bonzini * scheduled. 349dcc772e2SLiu Ping Fan * This func is async. The bottom half will do the delete action at the finial 350dcc772e2SLiu Ping Fan * end. 351737e150eSPaolo Bonzini * 352737e150eSPaolo Bonzini * @bh: The bottom half to be deleted. 353737e150eSPaolo Bonzini */ 354737e150eSPaolo Bonzini void qemu_bh_delete(QEMUBH *bh); 355737e150eSPaolo Bonzini 356737e150eSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 357a3462c65SPaolo Bonzini * attached to the AioContext, before g_poll is invoked. 358a3462c65SPaolo Bonzini * 359a3462c65SPaolo Bonzini * This is used internally in the implementation of the GSource. 360a3462c65SPaolo Bonzini */ 361a3462c65SPaolo Bonzini bool aio_prepare(AioContext *ctx); 362a3462c65SPaolo Bonzini 363a3462c65SPaolo Bonzini /* Return whether there are any pending callbacks from the GSource 364a3462c65SPaolo Bonzini * attached to the AioContext, after g_poll is invoked. 365737e150eSPaolo Bonzini * 366737e150eSPaolo Bonzini * This is used internally in the implementation of the GSource. 367737e150eSPaolo Bonzini */ 368737e150eSPaolo Bonzini bool aio_pending(AioContext *ctx); 369737e150eSPaolo Bonzini 370e4c7e2d1SPaolo Bonzini /* Dispatch any pending callbacks from the GSource attached to the AioContext. 371e4c7e2d1SPaolo Bonzini * 372e4c7e2d1SPaolo Bonzini * This is used internally in the implementation of the GSource. 373e4c7e2d1SPaolo Bonzini */ 374a153bf52SPaolo Bonzini void aio_dispatch(AioContext *ctx); 375e4c7e2d1SPaolo Bonzini 376737e150eSPaolo Bonzini /* Progress in completing AIO work to occur. This can issue new pending 377737e150eSPaolo Bonzini * aio as a result of executing I/O completion or bh callbacks. 378737e150eSPaolo Bonzini * 379acfb23adSPaolo Bonzini * Return whether any progress was made by executing AIO or bottom half 380acfb23adSPaolo Bonzini * handlers. If @blocking == true, this should always be true except 381acfb23adSPaolo Bonzini * if someone called aio_notify. 382737e150eSPaolo Bonzini * 383737e150eSPaolo Bonzini * If there are no pending bottom halves, but there are pending AIO 384737e150eSPaolo Bonzini * operations, it may not be possible to make any progress without 385737e150eSPaolo Bonzini * blocking. If @blocking is true, this function will wait until one 386737e150eSPaolo Bonzini * or more AIO events have completed, to ensure something has moved 387737e150eSPaolo Bonzini * before returning. 388737e150eSPaolo Bonzini */ 389737e150eSPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking); 390737e150eSPaolo Bonzini 391737e150eSPaolo Bonzini /* Register a file descriptor and associated callbacks. Behaves very similarly 3926484e422SFam Zheng * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will 39387f68d31SPaolo Bonzini * be invoked when using aio_poll(). 394737e150eSPaolo Bonzini * 395737e150eSPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 396737e150eSPaolo Bonzini * instead of qemu_set_fd_handler[2]. 397737e150eSPaolo Bonzini */ 398737e150eSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx, 399737e150eSPaolo Bonzini int fd, 400dca21ef2SFam Zheng bool is_external, 401737e150eSPaolo Bonzini IOHandler *io_read, 402737e150eSPaolo Bonzini IOHandler *io_write, 403f6a51c84SStefan Hajnoczi AioPollFn *io_poll, 404737e150eSPaolo Bonzini void *opaque); 405737e150eSPaolo Bonzini 406684e508cSStefan Hajnoczi /* Set polling begin/end callbacks for a file descriptor that has already been 407684e508cSStefan Hajnoczi * registered with aio_set_fd_handler. Do nothing if the file descriptor is 408684e508cSStefan Hajnoczi * not registered. 409684e508cSStefan Hajnoczi */ 410684e508cSStefan Hajnoczi void aio_set_fd_poll(AioContext *ctx, int fd, 411684e508cSStefan Hajnoczi IOHandler *io_poll_begin, 412684e508cSStefan Hajnoczi IOHandler *io_poll_end); 413684e508cSStefan Hajnoczi 414737e150eSPaolo Bonzini /* Register an event notifier and associated callbacks. Behaves very similarly 415737e150eSPaolo Bonzini * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks 41687f68d31SPaolo Bonzini * will be invoked when using aio_poll(). 417737e150eSPaolo Bonzini * 418737e150eSPaolo Bonzini * Code that invokes AIO completion functions should rely on this function 419737e150eSPaolo Bonzini * instead of event_notifier_set_handler. 420737e150eSPaolo Bonzini */ 421737e150eSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx, 422737e150eSPaolo Bonzini EventNotifier *notifier, 423dca21ef2SFam Zheng bool is_external, 424f6a51c84SStefan Hajnoczi EventNotifierHandler *io_read, 425f6a51c84SStefan Hajnoczi AioPollFn *io_poll); 426737e150eSPaolo Bonzini 427684e508cSStefan Hajnoczi /* Set polling begin/end callbacks for an event notifier that has already been 428684e508cSStefan Hajnoczi * registered with aio_set_event_notifier. Do nothing if the event notifier is 429684e508cSStefan Hajnoczi * not registered. 430684e508cSStefan Hajnoczi */ 431684e508cSStefan Hajnoczi void aio_set_event_notifier_poll(AioContext *ctx, 432684e508cSStefan Hajnoczi EventNotifier *notifier, 433684e508cSStefan Hajnoczi EventNotifierHandler *io_poll_begin, 434684e508cSStefan Hajnoczi EventNotifierHandler *io_poll_end); 435684e508cSStefan Hajnoczi 436737e150eSPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached 437737e150eSPaolo Bonzini * to this AioContext. 438737e150eSPaolo Bonzini */ 439737e150eSPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx); 440737e150eSPaolo Bonzini 4419b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */ 4429b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx); 4439b34277dSStefan Hajnoczi 444ed6e2161SNishanth Aravamudan /* Setup the LinuxAioState bound to this AioContext */ 445ed6e2161SNishanth Aravamudan struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); 446ed6e2161SNishanth Aravamudan 4470187f5c9SPaolo Bonzini /* Return the LinuxAioState bound to this AioContext */ 4480187f5c9SPaolo Bonzini struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); 4490187f5c9SPaolo Bonzini 4506663a0a3SAarushi Mehta /* Setup the LuringState bound to this AioContext */ 4516663a0a3SAarushi Mehta struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp); 4526663a0a3SAarushi Mehta 4536663a0a3SAarushi Mehta /* Return the LuringState bound to this AioContext */ 4546663a0a3SAarushi Mehta struct LuringState *aio_get_linux_io_uring(AioContext *ctx); 4554e29e831SAlex Bligh /** 45689a603a0SArtem Pisarenko * aio_timer_new_with_attrs: 45789a603a0SArtem Pisarenko * @ctx: the aio context 45889a603a0SArtem Pisarenko * @type: the clock type 45989a603a0SArtem Pisarenko * @scale: the scale 46089a603a0SArtem Pisarenko * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values 46189a603a0SArtem Pisarenko * to assign 46289a603a0SArtem Pisarenko * @cb: the callback to call on timer expiry 46389a603a0SArtem Pisarenko * @opaque: the opaque pointer to pass to the callback 46489a603a0SArtem Pisarenko * 46589a603a0SArtem Pisarenko * Allocate a new timer (with attributes) attached to the context @ctx. 46689a603a0SArtem Pisarenko * The function is responsible for memory allocation. 46789a603a0SArtem Pisarenko * 46889a603a0SArtem Pisarenko * The preferred interface is aio_timer_init or aio_timer_init_with_attrs. 46989a603a0SArtem Pisarenko * Use that unless you really need dynamic memory allocation. 47089a603a0SArtem Pisarenko * 47189a603a0SArtem Pisarenko * Returns: a pointer to the new timer 47289a603a0SArtem Pisarenko */ 47389a603a0SArtem Pisarenko static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx, 47489a603a0SArtem Pisarenko QEMUClockType type, 47589a603a0SArtem Pisarenko int scale, int attributes, 47689a603a0SArtem Pisarenko QEMUTimerCB *cb, void *opaque) 47789a603a0SArtem Pisarenko { 47889a603a0SArtem Pisarenko return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque); 47989a603a0SArtem Pisarenko } 48089a603a0SArtem Pisarenko 48189a603a0SArtem Pisarenko /** 4824e29e831SAlex Bligh * aio_timer_new: 4834e29e831SAlex Bligh * @ctx: the aio context 4844e29e831SAlex Bligh * @type: the clock type 4854e29e831SAlex Bligh * @scale: the scale 4864e29e831SAlex Bligh * @cb: the callback to call on timer expiry 4874e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 4884e29e831SAlex Bligh * 4894e29e831SAlex Bligh * Allocate a new timer attached to the context @ctx. 49089a603a0SArtem Pisarenko * See aio_timer_new_with_attrs for details. 4914e29e831SAlex Bligh * 4924e29e831SAlex Bligh * Returns: a pointer to the new timer 4934e29e831SAlex Bligh */ 4944e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, 4954e29e831SAlex Bligh int scale, 4964e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 4974e29e831SAlex Bligh { 49889a603a0SArtem Pisarenko return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque); 49989a603a0SArtem Pisarenko } 50089a603a0SArtem Pisarenko 50189a603a0SArtem Pisarenko /** 50289a603a0SArtem Pisarenko * aio_timer_init_with_attrs: 50389a603a0SArtem Pisarenko * @ctx: the aio context 50489a603a0SArtem Pisarenko * @ts: the timer 50589a603a0SArtem Pisarenko * @type: the clock type 50689a603a0SArtem Pisarenko * @scale: the scale 50789a603a0SArtem Pisarenko * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values 50889a603a0SArtem Pisarenko * to assign 50989a603a0SArtem Pisarenko * @cb: the callback to call on timer expiry 51089a603a0SArtem Pisarenko * @opaque: the opaque pointer to pass to the callback 51189a603a0SArtem Pisarenko * 51289a603a0SArtem Pisarenko * Initialise a new timer (with attributes) attached to the context @ctx. 51389a603a0SArtem Pisarenko * The caller is responsible for memory allocation. 51489a603a0SArtem Pisarenko */ 51589a603a0SArtem Pisarenko static inline void aio_timer_init_with_attrs(AioContext *ctx, 51689a603a0SArtem Pisarenko QEMUTimer *ts, QEMUClockType type, 51789a603a0SArtem Pisarenko int scale, int attributes, 51889a603a0SArtem Pisarenko QEMUTimerCB *cb, void *opaque) 51989a603a0SArtem Pisarenko { 52089a603a0SArtem Pisarenko timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque); 5214e29e831SAlex Bligh } 5224e29e831SAlex Bligh 5234e29e831SAlex Bligh /** 5244e29e831SAlex Bligh * aio_timer_init: 5254e29e831SAlex Bligh * @ctx: the aio context 5264e29e831SAlex Bligh * @ts: the timer 5274e29e831SAlex Bligh * @type: the clock type 5284e29e831SAlex Bligh * @scale: the scale 5294e29e831SAlex Bligh * @cb: the callback to call on timer expiry 5304e29e831SAlex Bligh * @opaque: the opaque pointer to pass to the callback 5314e29e831SAlex Bligh * 5324e29e831SAlex Bligh * Initialise a new timer attached to the context @ctx. 53389a603a0SArtem Pisarenko * See aio_timer_init_with_attrs for details. 5344e29e831SAlex Bligh */ 5354e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx, 5364e29e831SAlex Bligh QEMUTimer *ts, QEMUClockType type, 5374e29e831SAlex Bligh int scale, 5384e29e831SAlex Bligh QEMUTimerCB *cb, void *opaque) 5394e29e831SAlex Bligh { 54089a603a0SArtem Pisarenko timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque); 5414e29e831SAlex Bligh } 5424e29e831SAlex Bligh 543845ca10dSPaolo Bonzini /** 544845ca10dSPaolo Bonzini * aio_compute_timeout: 545845ca10dSPaolo Bonzini * @ctx: the aio context 546845ca10dSPaolo Bonzini * 547845ca10dSPaolo Bonzini * Compute the timeout that a blocking aio_poll should use. 548845ca10dSPaolo Bonzini */ 549845ca10dSPaolo Bonzini int64_t aio_compute_timeout(AioContext *ctx); 550845ca10dSPaolo Bonzini 551c1e1e5faSFam Zheng /** 552c1e1e5faSFam Zheng * aio_disable_external: 553c1e1e5faSFam Zheng * @ctx: the aio context 554c1e1e5faSFam Zheng * 555c1e1e5faSFam Zheng * Disable the further processing of external clients. 556c1e1e5faSFam Zheng */ 557c1e1e5faSFam Zheng static inline void aio_disable_external(AioContext *ctx) 558c1e1e5faSFam Zheng { 559c1e1e5faSFam Zheng atomic_inc(&ctx->external_disable_cnt); 560c1e1e5faSFam Zheng } 561c1e1e5faSFam Zheng 562c1e1e5faSFam Zheng /** 563c1e1e5faSFam Zheng * aio_enable_external: 564c1e1e5faSFam Zheng * @ctx: the aio context 565c1e1e5faSFam Zheng * 566c1e1e5faSFam Zheng * Enable the processing of external clients. 567c1e1e5faSFam Zheng */ 568c1e1e5faSFam Zheng static inline void aio_enable_external(AioContext *ctx) 569c1e1e5faSFam Zheng { 570321d1dbaSStefan Hajnoczi int old; 571321d1dbaSStefan Hajnoczi 572321d1dbaSStefan Hajnoczi old = atomic_fetch_dec(&ctx->external_disable_cnt); 573321d1dbaSStefan Hajnoczi assert(old > 0); 574321d1dbaSStefan Hajnoczi if (old == 1) { 575321d1dbaSStefan Hajnoczi /* Kick event loop so it re-arms file descriptors */ 576321d1dbaSStefan Hajnoczi aio_notify(ctx); 577321d1dbaSStefan Hajnoczi } 578c1e1e5faSFam Zheng } 579c1e1e5faSFam Zheng 580c1e1e5faSFam Zheng /** 5815ceb9e39SFam Zheng * aio_external_disabled: 5825ceb9e39SFam Zheng * @ctx: the aio context 5835ceb9e39SFam Zheng * 5845ceb9e39SFam Zheng * Return true if the external clients are disabled. 5855ceb9e39SFam Zheng */ 5865ceb9e39SFam Zheng static inline bool aio_external_disabled(AioContext *ctx) 5875ceb9e39SFam Zheng { 5885ceb9e39SFam Zheng return atomic_read(&ctx->external_disable_cnt); 5895ceb9e39SFam Zheng } 5905ceb9e39SFam Zheng 5915ceb9e39SFam Zheng /** 592c1e1e5faSFam Zheng * aio_node_check: 593c1e1e5faSFam Zheng * @ctx: the aio context 594c1e1e5faSFam Zheng * @is_external: Whether or not the checked node is an external event source. 595c1e1e5faSFam Zheng * 596c1e1e5faSFam Zheng * Check if the node's is_external flag is okay to be polled by the ctx at this 597c1e1e5faSFam Zheng * moment. True means green light. 598c1e1e5faSFam Zheng */ 599c1e1e5faSFam Zheng static inline bool aio_node_check(AioContext *ctx, bool is_external) 600c1e1e5faSFam Zheng { 601c1e1e5faSFam Zheng return !is_external || !atomic_read(&ctx->external_disable_cnt); 602c1e1e5faSFam Zheng } 603c1e1e5faSFam Zheng 60437fcee5dSFam Zheng /** 6050c330a73SPaolo Bonzini * aio_co_schedule: 6060c330a73SPaolo Bonzini * @ctx: the aio context 6070c330a73SPaolo Bonzini * @co: the coroutine 6080c330a73SPaolo Bonzini * 6090c330a73SPaolo Bonzini * Start a coroutine on a remote AioContext. 6100c330a73SPaolo Bonzini * 6110c330a73SPaolo Bonzini * The coroutine must not be entered by anyone else while aio_co_schedule() 6120c330a73SPaolo Bonzini * is active. In addition the coroutine must have yielded unless ctx 6130c330a73SPaolo Bonzini * is the context in which the coroutine is running (i.e. the value of 6140c330a73SPaolo Bonzini * qemu_get_current_aio_context() from the coroutine itself). 6150c330a73SPaolo Bonzini */ 6160c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, struct Coroutine *co); 6170c330a73SPaolo Bonzini 6180c330a73SPaolo Bonzini /** 6190c330a73SPaolo Bonzini * aio_co_wake: 6200c330a73SPaolo Bonzini * @co: the coroutine 6210c330a73SPaolo Bonzini * 6220c330a73SPaolo Bonzini * Restart a coroutine on the AioContext where it was running last, thus 6230c330a73SPaolo Bonzini * preventing coroutines from jumping from one context to another when they 6240c330a73SPaolo Bonzini * go to sleep. 6250c330a73SPaolo Bonzini * 6260c330a73SPaolo Bonzini * aio_co_wake may be executed either in coroutine or non-coroutine 6270c330a73SPaolo Bonzini * context. The coroutine must not be entered by anyone else while 6280c330a73SPaolo Bonzini * aio_co_wake() is active. 6290c330a73SPaolo Bonzini */ 6300c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co); 6310c330a73SPaolo Bonzini 6320c330a73SPaolo Bonzini /** 6338865852eSFam Zheng * aio_co_enter: 6348865852eSFam Zheng * @ctx: the context to run the coroutine 6358865852eSFam Zheng * @co: the coroutine to run 6368865852eSFam Zheng * 6378865852eSFam Zheng * Enter a coroutine in the specified AioContext. 6388865852eSFam Zheng */ 6398865852eSFam Zheng void aio_co_enter(AioContext *ctx, struct Coroutine *co); 6408865852eSFam Zheng 6418865852eSFam Zheng /** 642e4370165SPaolo Bonzini * Return the AioContext whose event loop runs in the current thread. 643e4370165SPaolo Bonzini * 644e4370165SPaolo Bonzini * If called from an IOThread this will be the IOThread's AioContext. If 645e4370165SPaolo Bonzini * called from another thread it will be the main loop AioContext. 646e4370165SPaolo Bonzini */ 647e4370165SPaolo Bonzini AioContext *qemu_get_current_aio_context(void); 648e4370165SPaolo Bonzini 649e4370165SPaolo Bonzini /** 650d2b63ba8SStefan Hajnoczi * in_aio_context_home_thread: 651e4370165SPaolo Bonzini * @ctx: the aio context 652e4370165SPaolo Bonzini * 653d2b63ba8SStefan Hajnoczi * Return whether we are running in the thread that normally runs @ctx. Note 654d2b63ba8SStefan Hajnoczi * that acquiring/releasing ctx does not affect the outcome, each AioContext 655d2b63ba8SStefan Hajnoczi * still only has one home thread that is responsible for running it. 656e4370165SPaolo Bonzini */ 657d2b63ba8SStefan Hajnoczi static inline bool in_aio_context_home_thread(AioContext *ctx) 658e4370165SPaolo Bonzini { 659e4370165SPaolo Bonzini return ctx == qemu_get_current_aio_context(); 660e4370165SPaolo Bonzini } 661e4370165SPaolo Bonzini 662e4370165SPaolo Bonzini /** 66337fcee5dSFam Zheng * aio_context_setup: 66437fcee5dSFam Zheng * @ctx: the aio context 66537fcee5dSFam Zheng * 66637fcee5dSFam Zheng * Initialize the aio context. 66737fcee5dSFam Zheng */ 6687e003465SCao jin void aio_context_setup(AioContext *ctx); 66937fcee5dSFam Zheng 6704a1cba38SStefan Hajnoczi /** 671cd0a6d2bSJie Wang * aio_context_destroy: 672cd0a6d2bSJie Wang * @ctx: the aio context 673cd0a6d2bSJie Wang * 674cd0a6d2bSJie Wang * Destroy the aio context. 675cd0a6d2bSJie Wang */ 676cd0a6d2bSJie Wang void aio_context_destroy(AioContext *ctx); 677cd0a6d2bSJie Wang 678cd0a6d2bSJie Wang /** 6794a1cba38SStefan Hajnoczi * aio_context_set_poll_params: 6804a1cba38SStefan Hajnoczi * @ctx: the aio context 6814a1cba38SStefan Hajnoczi * @max_ns: how long to busy poll for, in nanoseconds 68282a41186SStefan Hajnoczi * @grow: polling time growth factor 68382a41186SStefan Hajnoczi * @shrink: polling time shrink factor 6844a1cba38SStefan Hajnoczi * 6854a1cba38SStefan Hajnoczi * Poll mode can be disabled by setting poll_max_ns to 0. 6864a1cba38SStefan Hajnoczi */ 6874a1cba38SStefan Hajnoczi void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, 68882a41186SStefan Hajnoczi int64_t grow, int64_t shrink, 6894a1cba38SStefan Hajnoczi Error **errp); 6904a1cba38SStefan Hajnoczi 691737e150eSPaolo Bonzini #endif 692