xref: /openbmc/qemu/include/block/aio.h (revision 1f050a4690f62a1e7dabc4f44141e9f762c3769f)
1737e150eSPaolo Bonzini /*
2737e150eSPaolo Bonzini  * QEMU aio implementation
3737e150eSPaolo Bonzini  *
4737e150eSPaolo Bonzini  * Copyright IBM, Corp. 2008
5737e150eSPaolo Bonzini  *
6737e150eSPaolo Bonzini  * Authors:
7737e150eSPaolo Bonzini  *  Anthony Liguori   <aliguori@us.ibm.com>
8737e150eSPaolo Bonzini  *
9737e150eSPaolo Bonzini  * This work is licensed under the terms of the GNU GPL, version 2.  See
10737e150eSPaolo Bonzini  * the COPYING file in the top-level directory.
11737e150eSPaolo Bonzini  *
12737e150eSPaolo Bonzini  */
13737e150eSPaolo Bonzini 
14737e150eSPaolo Bonzini #ifndef QEMU_AIO_H
15737e150eSPaolo Bonzini #define QEMU_AIO_H
16737e150eSPaolo Bonzini 
171de7afc9SPaolo Bonzini #include "qemu/queue.h"
181de7afc9SPaolo Bonzini #include "qemu/event_notifier.h"
19dcc772e2SLiu Ping Fan #include "qemu/thread.h"
20dae21b98SAlex Bligh #include "qemu/timer.h"
21737e150eSPaolo Bonzini 
227c84b1b8SMarkus Armbruster typedef struct BlockAIOCB BlockAIOCB;
23097310b5SMarkus Armbruster typedef void BlockCompletionFunc(void *opaque, int ret);
24737e150eSPaolo Bonzini 
25737e150eSPaolo Bonzini typedef struct AIOCBInfo {
267c84b1b8SMarkus Armbruster     void (*cancel_async)(BlockAIOCB *acb);
277c84b1b8SMarkus Armbruster     AioContext *(*get_aio_context)(BlockAIOCB *acb);
28737e150eSPaolo Bonzini     size_t aiocb_size;
29737e150eSPaolo Bonzini } AIOCBInfo;
30737e150eSPaolo Bonzini 
317c84b1b8SMarkus Armbruster struct BlockAIOCB {
32737e150eSPaolo Bonzini     const AIOCBInfo *aiocb_info;
33737e150eSPaolo Bonzini     BlockDriverState *bs;
34097310b5SMarkus Armbruster     BlockCompletionFunc *cb;
35737e150eSPaolo Bonzini     void *opaque;
36f197fe2bSFam Zheng     int refcnt;
37737e150eSPaolo Bonzini };
38737e150eSPaolo Bonzini 
39737e150eSPaolo Bonzini void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
40097310b5SMarkus Armbruster                    BlockCompletionFunc *cb, void *opaque);
418007429aSFam Zheng void qemu_aio_unref(void *p);
42f197fe2bSFam Zheng void qemu_aio_ref(void *p);
43737e150eSPaolo Bonzini 
44737e150eSPaolo Bonzini typedef struct AioHandler AioHandler;
454749079cSStefan Hajnoczi typedef QLIST_HEAD(, AioHandler) AioHandlerList;
46737e150eSPaolo Bonzini typedef void QEMUBHFunc(void *opaque);
47f6a51c84SStefan Hajnoczi typedef bool AioPollFn(void *opaque);
48737e150eSPaolo Bonzini typedef void IOHandler(void *opaque);
49737e150eSPaolo Bonzini 
500c330a73SPaolo Bonzini struct Coroutine;
510187f5c9SPaolo Bonzini struct ThreadPool;
520187f5c9SPaolo Bonzini struct LinuxAioState;
536663a0a3SAarushi Mehta struct LuringState;
540187f5c9SPaolo Bonzini 
55*1f050a46SStefan Hajnoczi /* Callbacks for file descriptor monitoring implementations */
56*1f050a46SStefan Hajnoczi typedef struct {
57*1f050a46SStefan Hajnoczi     /*
58*1f050a46SStefan Hajnoczi      * update:
59*1f050a46SStefan Hajnoczi      * @ctx: the AioContext
60*1f050a46SStefan Hajnoczi      * @node: the handler
61*1f050a46SStefan Hajnoczi      * @is_new: is the file descriptor already being monitored?
62*1f050a46SStefan Hajnoczi      *
63*1f050a46SStefan Hajnoczi      * Add/remove/modify a monitored file descriptor.  There are three cases:
64*1f050a46SStefan Hajnoczi      * 1. node->pfd.events == 0 means remove the file descriptor.
65*1f050a46SStefan Hajnoczi      * 2. !is_new means modify an already monitored file descriptor.
66*1f050a46SStefan Hajnoczi      * 3. is_new means add a new file descriptor.
67*1f050a46SStefan Hajnoczi      *
68*1f050a46SStefan Hajnoczi      * Called with ctx->list_lock acquired.
69*1f050a46SStefan Hajnoczi      */
70*1f050a46SStefan Hajnoczi     void (*update)(AioContext *ctx, AioHandler *node, bool is_new);
71*1f050a46SStefan Hajnoczi 
72*1f050a46SStefan Hajnoczi     /*
73*1f050a46SStefan Hajnoczi      * wait:
74*1f050a46SStefan Hajnoczi      * @ctx: the AioContext
75*1f050a46SStefan Hajnoczi      * @ready_list: list for handlers that become ready
76*1f050a46SStefan Hajnoczi      * @timeout: maximum duration to wait, in nanoseconds
77*1f050a46SStefan Hajnoczi      *
78*1f050a46SStefan Hajnoczi      * Wait for file descriptors to become ready and place them on ready_list.
79*1f050a46SStefan Hajnoczi      *
80*1f050a46SStefan Hajnoczi      * Called with ctx->list_lock incremented but not locked.
81*1f050a46SStefan Hajnoczi      *
82*1f050a46SStefan Hajnoczi      * Returns: number of ready file descriptors.
83*1f050a46SStefan Hajnoczi      */
84*1f050a46SStefan Hajnoczi     int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout);
85*1f050a46SStefan Hajnoczi } FDMonOps;
86*1f050a46SStefan Hajnoczi 
878c6b0356SStefan Hajnoczi /*
888c6b0356SStefan Hajnoczi  * Each aio_bh_poll() call carves off a slice of the BH list, so that newly
898c6b0356SStefan Hajnoczi  * scheduled BHs are not processed until the next aio_bh_poll() call.  All
908c6b0356SStefan Hajnoczi  * active aio_bh_poll() calls chain their slices together in a list, so that
918c6b0356SStefan Hajnoczi  * nested aio_bh_poll() calls process all scheduled bottom halves.
928c6b0356SStefan Hajnoczi  */
938c6b0356SStefan Hajnoczi typedef QSLIST_HEAD(, QEMUBH) BHList;
948c6b0356SStefan Hajnoczi typedef struct BHListSlice BHListSlice;
958c6b0356SStefan Hajnoczi struct BHListSlice {
968c6b0356SStefan Hajnoczi     BHList bh_list;
978c6b0356SStefan Hajnoczi     QSIMPLEQ_ENTRY(BHListSlice) next;
988c6b0356SStefan Hajnoczi };
998c6b0356SStefan Hajnoczi 
1006a1751b7SAlex Bligh struct AioContext {
101737e150eSPaolo Bonzini     GSource source;
102737e150eSPaolo Bonzini 
1037c690fd1SPaolo Bonzini     /* Used by AioContext users to protect from multi-threaded access.  */
1043fe71223SPaolo Bonzini     QemuRecMutex lock;
10598563fc3SStefan Hajnoczi 
1067c690fd1SPaolo Bonzini     /* The list of registered AIO handlers.  Protected by ctx->list_lock. */
1074749079cSStefan Hajnoczi     AioHandlerList aio_handlers;
1084749079cSStefan Hajnoczi 
1094749079cSStefan Hajnoczi     /* The list of AIO handlers to be deleted.  Protected by ctx->list_lock. */
1104749079cSStefan Hajnoczi     AioHandlerList deleted_aio_handlers;
111737e150eSPaolo Bonzini 
112eabc9779SPaolo Bonzini     /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
113eabc9779SPaolo Bonzini      * accessed with atomic primitives.  If this field is 0, everything
114eabc9779SPaolo Bonzini      * (file descriptors, bottom halves, timers) will be re-evaluated
115eabc9779SPaolo Bonzini      * before the next blocking poll(), thus the event_notifier_set call
116eabc9779SPaolo Bonzini      * can be skipped.  If it is non-zero, you may need to wake up a
117eabc9779SPaolo Bonzini      * concurrent aio_poll or the glib main event loop, making
118eabc9779SPaolo Bonzini      * event_notifier_set necessary.
119eabc9779SPaolo Bonzini      *
120eabc9779SPaolo Bonzini      * Bit 0 is reserved for GSource usage of the AioContext, and is 1
12154a16a63SCao jin      * between a call to aio_ctx_prepare and the next call to aio_ctx_check.
122eabc9779SPaolo Bonzini      * Bits 1-31 simply count the number of active calls to aio_poll
123eabc9779SPaolo Bonzini      * that are in the prepare or poll phase.
124eabc9779SPaolo Bonzini      *
125eabc9779SPaolo Bonzini      * The GSource and aio_poll must use a different mechanism because
126eabc9779SPaolo Bonzini      * there is no certainty that a call to GSource's prepare callback
127eabc9779SPaolo Bonzini      * (via g_main_context_prepare) is indeed followed by check and
128eabc9779SPaolo Bonzini      * dispatch.  It's not clear whether this would be a bug, but let's
129eabc9779SPaolo Bonzini      * play safe and allow it---it will just cause extra calls to
130eabc9779SPaolo Bonzini      * event_notifier_set until the next call to dispatch.
131eabc9779SPaolo Bonzini      *
132eabc9779SPaolo Bonzini      * Instead, the aio_poll calls include both the prepare and the
133eabc9779SPaolo Bonzini      * dispatch phase, hence a simple counter is enough for them.
1340ceb849bSPaolo Bonzini      */
135eabc9779SPaolo Bonzini     uint32_t notify_me;
1360ceb849bSPaolo Bonzini 
1377c690fd1SPaolo Bonzini     /* A lock to protect between QEMUBH and AioHandler adders and deleter,
1387c690fd1SPaolo Bonzini      * and to ensure that no callbacks are removed while we're walking and
1397c690fd1SPaolo Bonzini      * dispatching them.
140d7c99a12SPaolo Bonzini      */
141d7c99a12SPaolo Bonzini     QemuLockCnt list_lock;
1420ceb849bSPaolo Bonzini 
1438c6b0356SStefan Hajnoczi     /* Bottom Halves pending aio_bh_poll() processing */
1448c6b0356SStefan Hajnoczi     BHList bh_list;
1458c6b0356SStefan Hajnoczi 
1468c6b0356SStefan Hajnoczi     /* Chained BH list slices for each nested aio_bh_poll() call */
1478c6b0356SStefan Hajnoczi     QSIMPLEQ_HEAD(, BHListSlice) bh_slice_list;
148737e150eSPaolo Bonzini 
14905e514b1SPaolo Bonzini     /* Used by aio_notify.
15005e514b1SPaolo Bonzini      *
15105e514b1SPaolo Bonzini      * "notified" is used to avoid expensive event_notifier_test_and_clear
15205e514b1SPaolo Bonzini      * calls.  When it is clear, the EventNotifier is clear, or one thread
15305e514b1SPaolo Bonzini      * is going to clear "notified" before processing more events.  False
15405e514b1SPaolo Bonzini      * positives are possible, i.e. "notified" could be set even though the
15505e514b1SPaolo Bonzini      * EventNotifier is clear.
15605e514b1SPaolo Bonzini      *
15705e514b1SPaolo Bonzini      * Note that event_notifier_set *cannot* be optimized the same way.  For
15805e514b1SPaolo Bonzini      * more information on the problem that would result, see "#ifdef BUG2"
15905e514b1SPaolo Bonzini      * in the docs/aio_notify_accept.promela formal model.
16005e514b1SPaolo Bonzini      */
16105e514b1SPaolo Bonzini     bool notified;
162737e150eSPaolo Bonzini     EventNotifier notifier;
1636b5f8762SStefan Hajnoczi 
1640c330a73SPaolo Bonzini     QSLIST_HEAD(, Coroutine) scheduled_coroutines;
1650c330a73SPaolo Bonzini     QEMUBH *co_schedule_bh;
1660c330a73SPaolo Bonzini 
1677c690fd1SPaolo Bonzini     /* Thread pool for performing work and receiving completion callbacks.
1687c690fd1SPaolo Bonzini      * Has its own locking.
1697c690fd1SPaolo Bonzini      */
1709b34277dSStefan Hajnoczi     struct ThreadPool *thread_pool;
171dae21b98SAlex Bligh 
1720187f5c9SPaolo Bonzini #ifdef CONFIG_LINUX_AIO
1736663a0a3SAarushi Mehta     /*
1746663a0a3SAarushi Mehta      * State for native Linux AIO.  Uses aio_context_acquire/release for
1750187f5c9SPaolo Bonzini      * locking.
1760187f5c9SPaolo Bonzini      */
1770187f5c9SPaolo Bonzini     struct LinuxAioState *linux_aio;
1780187f5c9SPaolo Bonzini #endif
1796663a0a3SAarushi Mehta #ifdef CONFIG_LINUX_IO_URING
1806663a0a3SAarushi Mehta     /*
1816663a0a3SAarushi Mehta      * State for Linux io_uring.  Uses aio_context_acquire/release for
1826663a0a3SAarushi Mehta      * locking.
1836663a0a3SAarushi Mehta      */
1846663a0a3SAarushi Mehta     struct LuringState *linux_io_uring;
1856663a0a3SAarushi Mehta #endif
1860187f5c9SPaolo Bonzini 
1877c690fd1SPaolo Bonzini     /* TimerLists for calling timers - one per clock type.  Has its own
1887c690fd1SPaolo Bonzini      * locking.
1897c690fd1SPaolo Bonzini      */
190dae21b98SAlex Bligh     QEMUTimerListGroup tlg;
191c1e1e5faSFam Zheng 
192c1e1e5faSFam Zheng     int external_disable_cnt;
193fbe3fc5cSFam Zheng 
1944a1cba38SStefan Hajnoczi     /* Number of AioHandlers without .io_poll() */
1954a1cba38SStefan Hajnoczi     int poll_disable_cnt;
1964a1cba38SStefan Hajnoczi 
19782a41186SStefan Hajnoczi     /* Polling mode parameters */
19882a41186SStefan Hajnoczi     int64_t poll_ns;        /* current polling time in nanoseconds */
19982a41186SStefan Hajnoczi     int64_t poll_max_ns;    /* maximum polling time in nanoseconds */
20082a41186SStefan Hajnoczi     int64_t poll_grow;      /* polling time growth factor */
20182a41186SStefan Hajnoczi     int64_t poll_shrink;    /* polling time shrink factor */
2024a1cba38SStefan Hajnoczi 
203684e508cSStefan Hajnoczi     /* Are we in polling mode or monitoring file descriptors? */
204684e508cSStefan Hajnoczi     bool poll_started;
205684e508cSStefan Hajnoczi 
206fbe3fc5cSFam Zheng     /* epoll(7) state used when built with CONFIG_EPOLL */
207fbe3fc5cSFam Zheng     int epollfd;
208*1f050a46SStefan Hajnoczi 
209*1f050a46SStefan Hajnoczi     const FDMonOps *fdmon_ops;
2106a1751b7SAlex Bligh };
211737e150eSPaolo Bonzini 
212737e150eSPaolo Bonzini /**
213737e150eSPaolo Bonzini  * aio_context_new: Allocate a new AioContext.
214737e150eSPaolo Bonzini  *
215737e150eSPaolo Bonzini  * AioContext provide a mini event-loop that can be waited on synchronously.
216737e150eSPaolo Bonzini  * They also provide bottom halves, a service to execute a piece of code
217737e150eSPaolo Bonzini  * as soon as possible.
218737e150eSPaolo Bonzini  */
2192f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp);
220737e150eSPaolo Bonzini 
221737e150eSPaolo Bonzini /**
222737e150eSPaolo Bonzini  * aio_context_ref:
223737e150eSPaolo Bonzini  * @ctx: The AioContext to operate on.
224737e150eSPaolo Bonzini  *
225737e150eSPaolo Bonzini  * Add a reference to an AioContext.
226737e150eSPaolo Bonzini  */
227737e150eSPaolo Bonzini void aio_context_ref(AioContext *ctx);
228737e150eSPaolo Bonzini 
229737e150eSPaolo Bonzini /**
230737e150eSPaolo Bonzini  * aio_context_unref:
231737e150eSPaolo Bonzini  * @ctx: The AioContext to operate on.
232737e150eSPaolo Bonzini  *
233737e150eSPaolo Bonzini  * Drop a reference to an AioContext.
234737e150eSPaolo Bonzini  */
235737e150eSPaolo Bonzini void aio_context_unref(AioContext *ctx);
236737e150eSPaolo Bonzini 
23798563fc3SStefan Hajnoczi /* Take ownership of the AioContext.  If the AioContext will be shared between
23849110174SPaolo Bonzini  * threads, and a thread does not want to be interrupted, it will have to
23949110174SPaolo Bonzini  * take ownership around calls to aio_poll().  Otherwise, aio_poll()
24049110174SPaolo Bonzini  * automatically takes care of calling aio_context_acquire and
24149110174SPaolo Bonzini  * aio_context_release.
24298563fc3SStefan Hajnoczi  *
2437c690fd1SPaolo Bonzini  * Note that this is separate from bdrv_drained_begin/bdrv_drained_end.  A
2447c690fd1SPaolo Bonzini  * thread still has to call those to avoid being interrupted by the guest.
2457c690fd1SPaolo Bonzini  *
2467c690fd1SPaolo Bonzini  * Bottom halves, timers and callbacks can be created or removed without
2477c690fd1SPaolo Bonzini  * acquiring the AioContext.
24898563fc3SStefan Hajnoczi  */
24998563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx);
25098563fc3SStefan Hajnoczi 
25198563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */
25298563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx);
25398563fc3SStefan Hajnoczi 
254737e150eSPaolo Bonzini /**
2555b8bb359SPaolo Bonzini  * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
2565b8bb359SPaolo Bonzini  * only once and as soon as possible.
2575b8bb359SPaolo Bonzini  */
2585b8bb359SPaolo Bonzini void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
2595b8bb359SPaolo Bonzini 
2605b8bb359SPaolo Bonzini /**
261737e150eSPaolo Bonzini  * aio_bh_new: Allocate a new bottom half structure.
262737e150eSPaolo Bonzini  *
263737e150eSPaolo Bonzini  * Bottom halves are lightweight callbacks whose invocation is guaranteed
264737e150eSPaolo Bonzini  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
265737e150eSPaolo Bonzini  * is opaque and must be allocated prior to its use.
266737e150eSPaolo Bonzini  */
267737e150eSPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
268737e150eSPaolo Bonzini 
269737e150eSPaolo Bonzini /**
270737e150eSPaolo Bonzini  * aio_notify: Force processing of pending events.
271737e150eSPaolo Bonzini  *
272737e150eSPaolo Bonzini  * Similar to signaling a condition variable, aio_notify forces
273722f8d90SYaowei Bai  * aio_poll to exit, so that the next call will re-examine pending events.
274722f8d90SYaowei Bai  * The caller of aio_notify will usually call aio_poll again very soon,
275737e150eSPaolo Bonzini  * or go through another iteration of the GLib main loop.  Hence, aio_notify
276737e150eSPaolo Bonzini  * also has the side effect of recalculating the sets of file descriptors
277737e150eSPaolo Bonzini  * that the main loop waits for.
278737e150eSPaolo Bonzini  *
279737e150eSPaolo Bonzini  * Calling aio_notify is rarely necessary, because for example scheduling
280737e150eSPaolo Bonzini  * a bottom half calls it already.
281737e150eSPaolo Bonzini  */
282737e150eSPaolo Bonzini void aio_notify(AioContext *ctx);
283737e150eSPaolo Bonzini 
284737e150eSPaolo Bonzini /**
28505e514b1SPaolo Bonzini  * aio_notify_accept: Acknowledge receiving an aio_notify.
28605e514b1SPaolo Bonzini  *
28705e514b1SPaolo Bonzini  * aio_notify() uses an EventNotifier in order to wake up a sleeping
28805e514b1SPaolo Bonzini  * aio_poll() or g_main_context_iteration().  Calls to aio_notify() are
28905e514b1SPaolo Bonzini  * usually rare, but the AioContext has to clear the EventNotifier on
29005e514b1SPaolo Bonzini  * every aio_poll() or g_main_context_iteration() in order to avoid
29105e514b1SPaolo Bonzini  * busy waiting.  This event_notifier_test_and_clear() cannot be done
29205e514b1SPaolo Bonzini  * using the usual aio_context_set_event_notifier(), because it must
29305e514b1SPaolo Bonzini  * be done before processing all events (file descriptors, bottom halves,
29405e514b1SPaolo Bonzini  * timers).
29505e514b1SPaolo Bonzini  *
29605e514b1SPaolo Bonzini  * aio_notify_accept() is an optimized event_notifier_test_and_clear()
29705e514b1SPaolo Bonzini  * that is specific to an AioContext's notifier; it is used internally
29805e514b1SPaolo Bonzini  * to clear the EventNotifier only if aio_notify() had been called.
29905e514b1SPaolo Bonzini  */
30005e514b1SPaolo Bonzini void aio_notify_accept(AioContext *ctx);
30105e514b1SPaolo Bonzini 
30205e514b1SPaolo Bonzini /**
303df281b80SPavel Dovgalyuk  * aio_bh_call: Executes callback function of the specified BH.
304df281b80SPavel Dovgalyuk  */
305df281b80SPavel Dovgalyuk void aio_bh_call(QEMUBH *bh);
306df281b80SPavel Dovgalyuk 
307df281b80SPavel Dovgalyuk /**
308737e150eSPaolo Bonzini  * aio_bh_poll: Poll bottom halves for an AioContext.
309737e150eSPaolo Bonzini  *
310737e150eSPaolo Bonzini  * These are internal functions used by the QEMU main loop.
311dcc772e2SLiu Ping Fan  * And notice that multiple occurrences of aio_bh_poll cannot
312dcc772e2SLiu Ping Fan  * be called concurrently
313737e150eSPaolo Bonzini  */
314737e150eSPaolo Bonzini int aio_bh_poll(AioContext *ctx);
315737e150eSPaolo Bonzini 
316737e150eSPaolo Bonzini /**
317737e150eSPaolo Bonzini  * qemu_bh_schedule: Schedule a bottom half.
318737e150eSPaolo Bonzini  *
319737e150eSPaolo Bonzini  * Scheduling a bottom half interrupts the main loop and causes the
320737e150eSPaolo Bonzini  * execution of the callback that was passed to qemu_bh_new.
321737e150eSPaolo Bonzini  *
322737e150eSPaolo Bonzini  * Bottom halves that are scheduled from a bottom half handler are instantly
323737e150eSPaolo Bonzini  * invoked.  This can create an infinite loop if a bottom half handler
324737e150eSPaolo Bonzini  * schedules itself.
325737e150eSPaolo Bonzini  *
326737e150eSPaolo Bonzini  * @bh: The bottom half to be scheduled.
327737e150eSPaolo Bonzini  */
328737e150eSPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh);
329737e150eSPaolo Bonzini 
330737e150eSPaolo Bonzini /**
331737e150eSPaolo Bonzini  * qemu_bh_cancel: Cancel execution of a bottom half.
332737e150eSPaolo Bonzini  *
333737e150eSPaolo Bonzini  * Canceling execution of a bottom half undoes the effect of calls to
334737e150eSPaolo Bonzini  * qemu_bh_schedule without freeing its resources yet.  While cancellation
335737e150eSPaolo Bonzini  * itself is also wait-free and thread-safe, it can of course race with the
336737e150eSPaolo Bonzini  * loop that executes bottom halves unless you are holding the iothread
337737e150eSPaolo Bonzini  * mutex.  This makes it mostly useless if you are not holding the mutex.
338737e150eSPaolo Bonzini  *
339737e150eSPaolo Bonzini  * @bh: The bottom half to be canceled.
340737e150eSPaolo Bonzini  */
341737e150eSPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh);
342737e150eSPaolo Bonzini 
343737e150eSPaolo Bonzini /**
344737e150eSPaolo Bonzini  *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
345737e150eSPaolo Bonzini  *
346737e150eSPaolo Bonzini  * Deleting a bottom half frees the memory that was allocated for it by
347737e150eSPaolo Bonzini  * qemu_bh_new.  It also implies canceling the bottom half if it was
348737e150eSPaolo Bonzini  * scheduled.
349dcc772e2SLiu Ping Fan  * This func is async. The bottom half will do the delete action at the finial
350dcc772e2SLiu Ping Fan  * end.
351737e150eSPaolo Bonzini  *
352737e150eSPaolo Bonzini  * @bh: The bottom half to be deleted.
353737e150eSPaolo Bonzini  */
354737e150eSPaolo Bonzini void qemu_bh_delete(QEMUBH *bh);
355737e150eSPaolo Bonzini 
356737e150eSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource
357a3462c65SPaolo Bonzini  * attached to the AioContext, before g_poll is invoked.
358a3462c65SPaolo Bonzini  *
359a3462c65SPaolo Bonzini  * This is used internally in the implementation of the GSource.
360a3462c65SPaolo Bonzini  */
361a3462c65SPaolo Bonzini bool aio_prepare(AioContext *ctx);
362a3462c65SPaolo Bonzini 
363a3462c65SPaolo Bonzini /* Return whether there are any pending callbacks from the GSource
364a3462c65SPaolo Bonzini  * attached to the AioContext, after g_poll is invoked.
365737e150eSPaolo Bonzini  *
366737e150eSPaolo Bonzini  * This is used internally in the implementation of the GSource.
367737e150eSPaolo Bonzini  */
368737e150eSPaolo Bonzini bool aio_pending(AioContext *ctx);
369737e150eSPaolo Bonzini 
370e4c7e2d1SPaolo Bonzini /* Dispatch any pending callbacks from the GSource attached to the AioContext.
371e4c7e2d1SPaolo Bonzini  *
372e4c7e2d1SPaolo Bonzini  * This is used internally in the implementation of the GSource.
373e4c7e2d1SPaolo Bonzini  */
374a153bf52SPaolo Bonzini void aio_dispatch(AioContext *ctx);
375e4c7e2d1SPaolo Bonzini 
376737e150eSPaolo Bonzini /* Progress in completing AIO work to occur.  This can issue new pending
377737e150eSPaolo Bonzini  * aio as a result of executing I/O completion or bh callbacks.
378737e150eSPaolo Bonzini  *
379acfb23adSPaolo Bonzini  * Return whether any progress was made by executing AIO or bottom half
380acfb23adSPaolo Bonzini  * handlers.  If @blocking == true, this should always be true except
381acfb23adSPaolo Bonzini  * if someone called aio_notify.
382737e150eSPaolo Bonzini  *
383737e150eSPaolo Bonzini  * If there are no pending bottom halves, but there are pending AIO
384737e150eSPaolo Bonzini  * operations, it may not be possible to make any progress without
385737e150eSPaolo Bonzini  * blocking.  If @blocking is true, this function will wait until one
386737e150eSPaolo Bonzini  * or more AIO events have completed, to ensure something has moved
387737e150eSPaolo Bonzini  * before returning.
388737e150eSPaolo Bonzini  */
389737e150eSPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking);
390737e150eSPaolo Bonzini 
391737e150eSPaolo Bonzini /* Register a file descriptor and associated callbacks.  Behaves very similarly
3926484e422SFam Zheng  * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
39387f68d31SPaolo Bonzini  * be invoked when using aio_poll().
394737e150eSPaolo Bonzini  *
395737e150eSPaolo Bonzini  * Code that invokes AIO completion functions should rely on this function
396737e150eSPaolo Bonzini  * instead of qemu_set_fd_handler[2].
397737e150eSPaolo Bonzini  */
398737e150eSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx,
399737e150eSPaolo Bonzini                         int fd,
400dca21ef2SFam Zheng                         bool is_external,
401737e150eSPaolo Bonzini                         IOHandler *io_read,
402737e150eSPaolo Bonzini                         IOHandler *io_write,
403f6a51c84SStefan Hajnoczi                         AioPollFn *io_poll,
404737e150eSPaolo Bonzini                         void *opaque);
405737e150eSPaolo Bonzini 
406684e508cSStefan Hajnoczi /* Set polling begin/end callbacks for a file descriptor that has already been
407684e508cSStefan Hajnoczi  * registered with aio_set_fd_handler.  Do nothing if the file descriptor is
408684e508cSStefan Hajnoczi  * not registered.
409684e508cSStefan Hajnoczi  */
410684e508cSStefan Hajnoczi void aio_set_fd_poll(AioContext *ctx, int fd,
411684e508cSStefan Hajnoczi                      IOHandler *io_poll_begin,
412684e508cSStefan Hajnoczi                      IOHandler *io_poll_end);
413684e508cSStefan Hajnoczi 
414737e150eSPaolo Bonzini /* Register an event notifier and associated callbacks.  Behaves very similarly
415737e150eSPaolo Bonzini  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
41687f68d31SPaolo Bonzini  * will be invoked when using aio_poll().
417737e150eSPaolo Bonzini  *
418737e150eSPaolo Bonzini  * Code that invokes AIO completion functions should rely on this function
419737e150eSPaolo Bonzini  * instead of event_notifier_set_handler.
420737e150eSPaolo Bonzini  */
421737e150eSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx,
422737e150eSPaolo Bonzini                             EventNotifier *notifier,
423dca21ef2SFam Zheng                             bool is_external,
424f6a51c84SStefan Hajnoczi                             EventNotifierHandler *io_read,
425f6a51c84SStefan Hajnoczi                             AioPollFn *io_poll);
426737e150eSPaolo Bonzini 
427684e508cSStefan Hajnoczi /* Set polling begin/end callbacks for an event notifier that has already been
428684e508cSStefan Hajnoczi  * registered with aio_set_event_notifier.  Do nothing if the event notifier is
429684e508cSStefan Hajnoczi  * not registered.
430684e508cSStefan Hajnoczi  */
431684e508cSStefan Hajnoczi void aio_set_event_notifier_poll(AioContext *ctx,
432684e508cSStefan Hajnoczi                                  EventNotifier *notifier,
433684e508cSStefan Hajnoczi                                  EventNotifierHandler *io_poll_begin,
434684e508cSStefan Hajnoczi                                  EventNotifierHandler *io_poll_end);
435684e508cSStefan Hajnoczi 
436737e150eSPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached
437737e150eSPaolo Bonzini  * to this AioContext.
438737e150eSPaolo Bonzini  */
439737e150eSPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx);
440737e150eSPaolo Bonzini 
4419b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */
4429b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
4439b34277dSStefan Hajnoczi 
444ed6e2161SNishanth Aravamudan /* Setup the LinuxAioState bound to this AioContext */
445ed6e2161SNishanth Aravamudan struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
446ed6e2161SNishanth Aravamudan 
4470187f5c9SPaolo Bonzini /* Return the LinuxAioState bound to this AioContext */
4480187f5c9SPaolo Bonzini struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
4490187f5c9SPaolo Bonzini 
4506663a0a3SAarushi Mehta /* Setup the LuringState bound to this AioContext */
4516663a0a3SAarushi Mehta struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp);
4526663a0a3SAarushi Mehta 
4536663a0a3SAarushi Mehta /* Return the LuringState bound to this AioContext */
4546663a0a3SAarushi Mehta struct LuringState *aio_get_linux_io_uring(AioContext *ctx);
4554e29e831SAlex Bligh /**
45689a603a0SArtem Pisarenko  * aio_timer_new_with_attrs:
45789a603a0SArtem Pisarenko  * @ctx: the aio context
45889a603a0SArtem Pisarenko  * @type: the clock type
45989a603a0SArtem Pisarenko  * @scale: the scale
46089a603a0SArtem Pisarenko  * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values
46189a603a0SArtem Pisarenko  *              to assign
46289a603a0SArtem Pisarenko  * @cb: the callback to call on timer expiry
46389a603a0SArtem Pisarenko  * @opaque: the opaque pointer to pass to the callback
46489a603a0SArtem Pisarenko  *
46589a603a0SArtem Pisarenko  * Allocate a new timer (with attributes) attached to the context @ctx.
46689a603a0SArtem Pisarenko  * The function is responsible for memory allocation.
46789a603a0SArtem Pisarenko  *
46889a603a0SArtem Pisarenko  * The preferred interface is aio_timer_init or aio_timer_init_with_attrs.
46989a603a0SArtem Pisarenko  * Use that unless you really need dynamic memory allocation.
47089a603a0SArtem Pisarenko  *
47189a603a0SArtem Pisarenko  * Returns: a pointer to the new timer
47289a603a0SArtem Pisarenko  */
47389a603a0SArtem Pisarenko static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx,
47489a603a0SArtem Pisarenko                                                   QEMUClockType type,
47589a603a0SArtem Pisarenko                                                   int scale, int attributes,
47689a603a0SArtem Pisarenko                                                   QEMUTimerCB *cb, void *opaque)
47789a603a0SArtem Pisarenko {
47889a603a0SArtem Pisarenko     return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque);
47989a603a0SArtem Pisarenko }
48089a603a0SArtem Pisarenko 
48189a603a0SArtem Pisarenko /**
4824e29e831SAlex Bligh  * aio_timer_new:
4834e29e831SAlex Bligh  * @ctx: the aio context
4844e29e831SAlex Bligh  * @type: the clock type
4854e29e831SAlex Bligh  * @scale: the scale
4864e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
4874e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
4884e29e831SAlex Bligh  *
4894e29e831SAlex Bligh  * Allocate a new timer attached to the context @ctx.
49089a603a0SArtem Pisarenko  * See aio_timer_new_with_attrs for details.
4914e29e831SAlex Bligh  *
4924e29e831SAlex Bligh  * Returns: a pointer to the new timer
4934e29e831SAlex Bligh  */
4944e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
4954e29e831SAlex Bligh                                        int scale,
4964e29e831SAlex Bligh                                        QEMUTimerCB *cb, void *opaque)
4974e29e831SAlex Bligh {
49889a603a0SArtem Pisarenko     return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque);
49989a603a0SArtem Pisarenko }
50089a603a0SArtem Pisarenko 
50189a603a0SArtem Pisarenko /**
50289a603a0SArtem Pisarenko  * aio_timer_init_with_attrs:
50389a603a0SArtem Pisarenko  * @ctx: the aio context
50489a603a0SArtem Pisarenko  * @ts: the timer
50589a603a0SArtem Pisarenko  * @type: the clock type
50689a603a0SArtem Pisarenko  * @scale: the scale
50789a603a0SArtem Pisarenko  * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values
50889a603a0SArtem Pisarenko  *              to assign
50989a603a0SArtem Pisarenko  * @cb: the callback to call on timer expiry
51089a603a0SArtem Pisarenko  * @opaque: the opaque pointer to pass to the callback
51189a603a0SArtem Pisarenko  *
51289a603a0SArtem Pisarenko  * Initialise a new timer (with attributes) attached to the context @ctx.
51389a603a0SArtem Pisarenko  * The caller is responsible for memory allocation.
51489a603a0SArtem Pisarenko  */
51589a603a0SArtem Pisarenko static inline void aio_timer_init_with_attrs(AioContext *ctx,
51689a603a0SArtem Pisarenko                                              QEMUTimer *ts, QEMUClockType type,
51789a603a0SArtem Pisarenko                                              int scale, int attributes,
51889a603a0SArtem Pisarenko                                              QEMUTimerCB *cb, void *opaque)
51989a603a0SArtem Pisarenko {
52089a603a0SArtem Pisarenko     timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque);
5214e29e831SAlex Bligh }
5224e29e831SAlex Bligh 
5234e29e831SAlex Bligh /**
5244e29e831SAlex Bligh  * aio_timer_init:
5254e29e831SAlex Bligh  * @ctx: the aio context
5264e29e831SAlex Bligh  * @ts: the timer
5274e29e831SAlex Bligh  * @type: the clock type
5284e29e831SAlex Bligh  * @scale: the scale
5294e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
5304e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
5314e29e831SAlex Bligh  *
5324e29e831SAlex Bligh  * Initialise a new timer attached to the context @ctx.
53389a603a0SArtem Pisarenko  * See aio_timer_init_with_attrs for details.
5344e29e831SAlex Bligh  */
5354e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx,
5364e29e831SAlex Bligh                                   QEMUTimer *ts, QEMUClockType type,
5374e29e831SAlex Bligh                                   int scale,
5384e29e831SAlex Bligh                                   QEMUTimerCB *cb, void *opaque)
5394e29e831SAlex Bligh {
54089a603a0SArtem Pisarenko     timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque);
5414e29e831SAlex Bligh }
5424e29e831SAlex Bligh 
543845ca10dSPaolo Bonzini /**
544845ca10dSPaolo Bonzini  * aio_compute_timeout:
545845ca10dSPaolo Bonzini  * @ctx: the aio context
546845ca10dSPaolo Bonzini  *
547845ca10dSPaolo Bonzini  * Compute the timeout that a blocking aio_poll should use.
548845ca10dSPaolo Bonzini  */
549845ca10dSPaolo Bonzini int64_t aio_compute_timeout(AioContext *ctx);
550845ca10dSPaolo Bonzini 
551c1e1e5faSFam Zheng /**
552c1e1e5faSFam Zheng  * aio_disable_external:
553c1e1e5faSFam Zheng  * @ctx: the aio context
554c1e1e5faSFam Zheng  *
555c1e1e5faSFam Zheng  * Disable the further processing of external clients.
556c1e1e5faSFam Zheng  */
557c1e1e5faSFam Zheng static inline void aio_disable_external(AioContext *ctx)
558c1e1e5faSFam Zheng {
559c1e1e5faSFam Zheng     atomic_inc(&ctx->external_disable_cnt);
560c1e1e5faSFam Zheng }
561c1e1e5faSFam Zheng 
562c1e1e5faSFam Zheng /**
563c1e1e5faSFam Zheng  * aio_enable_external:
564c1e1e5faSFam Zheng  * @ctx: the aio context
565c1e1e5faSFam Zheng  *
566c1e1e5faSFam Zheng  * Enable the processing of external clients.
567c1e1e5faSFam Zheng  */
568c1e1e5faSFam Zheng static inline void aio_enable_external(AioContext *ctx)
569c1e1e5faSFam Zheng {
570321d1dbaSStefan Hajnoczi     int old;
571321d1dbaSStefan Hajnoczi 
572321d1dbaSStefan Hajnoczi     old = atomic_fetch_dec(&ctx->external_disable_cnt);
573321d1dbaSStefan Hajnoczi     assert(old > 0);
574321d1dbaSStefan Hajnoczi     if (old == 1) {
575321d1dbaSStefan Hajnoczi         /* Kick event loop so it re-arms file descriptors */
576321d1dbaSStefan Hajnoczi         aio_notify(ctx);
577321d1dbaSStefan Hajnoczi     }
578c1e1e5faSFam Zheng }
579c1e1e5faSFam Zheng 
580c1e1e5faSFam Zheng /**
5815ceb9e39SFam Zheng  * aio_external_disabled:
5825ceb9e39SFam Zheng  * @ctx: the aio context
5835ceb9e39SFam Zheng  *
5845ceb9e39SFam Zheng  * Return true if the external clients are disabled.
5855ceb9e39SFam Zheng  */
5865ceb9e39SFam Zheng static inline bool aio_external_disabled(AioContext *ctx)
5875ceb9e39SFam Zheng {
5885ceb9e39SFam Zheng     return atomic_read(&ctx->external_disable_cnt);
5895ceb9e39SFam Zheng }
5905ceb9e39SFam Zheng 
5915ceb9e39SFam Zheng /**
592c1e1e5faSFam Zheng  * aio_node_check:
593c1e1e5faSFam Zheng  * @ctx: the aio context
594c1e1e5faSFam Zheng  * @is_external: Whether or not the checked node is an external event source.
595c1e1e5faSFam Zheng  *
596c1e1e5faSFam Zheng  * Check if the node's is_external flag is okay to be polled by the ctx at this
597c1e1e5faSFam Zheng  * moment. True means green light.
598c1e1e5faSFam Zheng  */
599c1e1e5faSFam Zheng static inline bool aio_node_check(AioContext *ctx, bool is_external)
600c1e1e5faSFam Zheng {
601c1e1e5faSFam Zheng     return !is_external || !atomic_read(&ctx->external_disable_cnt);
602c1e1e5faSFam Zheng }
603c1e1e5faSFam Zheng 
60437fcee5dSFam Zheng /**
6050c330a73SPaolo Bonzini  * aio_co_schedule:
6060c330a73SPaolo Bonzini  * @ctx: the aio context
6070c330a73SPaolo Bonzini  * @co: the coroutine
6080c330a73SPaolo Bonzini  *
6090c330a73SPaolo Bonzini  * Start a coroutine on a remote AioContext.
6100c330a73SPaolo Bonzini  *
6110c330a73SPaolo Bonzini  * The coroutine must not be entered by anyone else while aio_co_schedule()
6120c330a73SPaolo Bonzini  * is active.  In addition the coroutine must have yielded unless ctx
6130c330a73SPaolo Bonzini  * is the context in which the coroutine is running (i.e. the value of
6140c330a73SPaolo Bonzini  * qemu_get_current_aio_context() from the coroutine itself).
6150c330a73SPaolo Bonzini  */
6160c330a73SPaolo Bonzini void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
6170c330a73SPaolo Bonzini 
6180c330a73SPaolo Bonzini /**
6190c330a73SPaolo Bonzini  * aio_co_wake:
6200c330a73SPaolo Bonzini  * @co: the coroutine
6210c330a73SPaolo Bonzini  *
6220c330a73SPaolo Bonzini  * Restart a coroutine on the AioContext where it was running last, thus
6230c330a73SPaolo Bonzini  * preventing coroutines from jumping from one context to another when they
6240c330a73SPaolo Bonzini  * go to sleep.
6250c330a73SPaolo Bonzini  *
6260c330a73SPaolo Bonzini  * aio_co_wake may be executed either in coroutine or non-coroutine
6270c330a73SPaolo Bonzini  * context.  The coroutine must not be entered by anyone else while
6280c330a73SPaolo Bonzini  * aio_co_wake() is active.
6290c330a73SPaolo Bonzini  */
6300c330a73SPaolo Bonzini void aio_co_wake(struct Coroutine *co);
6310c330a73SPaolo Bonzini 
6320c330a73SPaolo Bonzini /**
6338865852eSFam Zheng  * aio_co_enter:
6348865852eSFam Zheng  * @ctx: the context to run the coroutine
6358865852eSFam Zheng  * @co: the coroutine to run
6368865852eSFam Zheng  *
6378865852eSFam Zheng  * Enter a coroutine in the specified AioContext.
6388865852eSFam Zheng  */
6398865852eSFam Zheng void aio_co_enter(AioContext *ctx, struct Coroutine *co);
6408865852eSFam Zheng 
6418865852eSFam Zheng /**
642e4370165SPaolo Bonzini  * Return the AioContext whose event loop runs in the current thread.
643e4370165SPaolo Bonzini  *
644e4370165SPaolo Bonzini  * If called from an IOThread this will be the IOThread's AioContext.  If
645e4370165SPaolo Bonzini  * called from another thread it will be the main loop AioContext.
646e4370165SPaolo Bonzini  */
647e4370165SPaolo Bonzini AioContext *qemu_get_current_aio_context(void);
648e4370165SPaolo Bonzini 
649e4370165SPaolo Bonzini /**
650d2b63ba8SStefan Hajnoczi  * in_aio_context_home_thread:
651e4370165SPaolo Bonzini  * @ctx: the aio context
652e4370165SPaolo Bonzini  *
653d2b63ba8SStefan Hajnoczi  * Return whether we are running in the thread that normally runs @ctx.  Note
654d2b63ba8SStefan Hajnoczi  * that acquiring/releasing ctx does not affect the outcome, each AioContext
655d2b63ba8SStefan Hajnoczi  * still only has one home thread that is responsible for running it.
656e4370165SPaolo Bonzini  */
657d2b63ba8SStefan Hajnoczi static inline bool in_aio_context_home_thread(AioContext *ctx)
658e4370165SPaolo Bonzini {
659e4370165SPaolo Bonzini     return ctx == qemu_get_current_aio_context();
660e4370165SPaolo Bonzini }
661e4370165SPaolo Bonzini 
662e4370165SPaolo Bonzini /**
66337fcee5dSFam Zheng  * aio_context_setup:
66437fcee5dSFam Zheng  * @ctx: the aio context
66537fcee5dSFam Zheng  *
66637fcee5dSFam Zheng  * Initialize the aio context.
66737fcee5dSFam Zheng  */
6687e003465SCao jin void aio_context_setup(AioContext *ctx);
66937fcee5dSFam Zheng 
6704a1cba38SStefan Hajnoczi /**
671cd0a6d2bSJie Wang  * aio_context_destroy:
672cd0a6d2bSJie Wang  * @ctx: the aio context
673cd0a6d2bSJie Wang  *
674cd0a6d2bSJie Wang  * Destroy the aio context.
675cd0a6d2bSJie Wang  */
676cd0a6d2bSJie Wang void aio_context_destroy(AioContext *ctx);
677cd0a6d2bSJie Wang 
678cd0a6d2bSJie Wang /**
6794a1cba38SStefan Hajnoczi  * aio_context_set_poll_params:
6804a1cba38SStefan Hajnoczi  * @ctx: the aio context
6814a1cba38SStefan Hajnoczi  * @max_ns: how long to busy poll for, in nanoseconds
68282a41186SStefan Hajnoczi  * @grow: polling time growth factor
68382a41186SStefan Hajnoczi  * @shrink: polling time shrink factor
6844a1cba38SStefan Hajnoczi  *
6854a1cba38SStefan Hajnoczi  * Poll mode can be disabled by setting poll_max_ns to 0.
6864a1cba38SStefan Hajnoczi  */
6874a1cba38SStefan Hajnoczi void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
68882a41186SStefan Hajnoczi                                  int64_t grow, int64_t shrink,
6894a1cba38SStefan Hajnoczi                                  Error **errp);
6904a1cba38SStefan Hajnoczi 
691737e150eSPaolo Bonzini #endif
692