xref: /openbmc/qemu/include/block/aio.h (revision eabc977973103527bbb8fed69c91cfaa6691f8ab)
1737e150eSPaolo Bonzini /*
2737e150eSPaolo Bonzini  * QEMU aio implementation
3737e150eSPaolo Bonzini  *
4737e150eSPaolo Bonzini  * Copyright IBM, Corp. 2008
5737e150eSPaolo Bonzini  *
6737e150eSPaolo Bonzini  * Authors:
7737e150eSPaolo Bonzini  *  Anthony Liguori   <aliguori@us.ibm.com>
8737e150eSPaolo Bonzini  *
9737e150eSPaolo Bonzini  * This work is licensed under the terms of the GNU GPL, version 2.  See
10737e150eSPaolo Bonzini  * the COPYING file in the top-level directory.
11737e150eSPaolo Bonzini  *
12737e150eSPaolo Bonzini  */
13737e150eSPaolo Bonzini 
14737e150eSPaolo Bonzini #ifndef QEMU_AIO_H
15737e150eSPaolo Bonzini #define QEMU_AIO_H
16737e150eSPaolo Bonzini 
176a1751b7SAlex Bligh #include "qemu/typedefs.h"
18737e150eSPaolo Bonzini #include "qemu-common.h"
191de7afc9SPaolo Bonzini #include "qemu/queue.h"
201de7afc9SPaolo Bonzini #include "qemu/event_notifier.h"
21dcc772e2SLiu Ping Fan #include "qemu/thread.h"
2298563fc3SStefan Hajnoczi #include "qemu/rfifolock.h"
23dae21b98SAlex Bligh #include "qemu/timer.h"
24737e150eSPaolo Bonzini 
257c84b1b8SMarkus Armbruster typedef struct BlockAIOCB BlockAIOCB;
26097310b5SMarkus Armbruster typedef void BlockCompletionFunc(void *opaque, int ret);
27737e150eSPaolo Bonzini 
28737e150eSPaolo Bonzini typedef struct AIOCBInfo {
297c84b1b8SMarkus Armbruster     void (*cancel_async)(BlockAIOCB *acb);
307c84b1b8SMarkus Armbruster     AioContext *(*get_aio_context)(BlockAIOCB *acb);
31737e150eSPaolo Bonzini     size_t aiocb_size;
32737e150eSPaolo Bonzini } AIOCBInfo;
33737e150eSPaolo Bonzini 
347c84b1b8SMarkus Armbruster struct BlockAIOCB {
35737e150eSPaolo Bonzini     const AIOCBInfo *aiocb_info;
36737e150eSPaolo Bonzini     BlockDriverState *bs;
37097310b5SMarkus Armbruster     BlockCompletionFunc *cb;
38737e150eSPaolo Bonzini     void *opaque;
39f197fe2bSFam Zheng     int refcnt;
40737e150eSPaolo Bonzini };
41737e150eSPaolo Bonzini 
42737e150eSPaolo Bonzini void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
43097310b5SMarkus Armbruster                    BlockCompletionFunc *cb, void *opaque);
448007429aSFam Zheng void qemu_aio_unref(void *p);
45f197fe2bSFam Zheng void qemu_aio_ref(void *p);
46737e150eSPaolo Bonzini 
47737e150eSPaolo Bonzini typedef struct AioHandler AioHandler;
48737e150eSPaolo Bonzini typedef void QEMUBHFunc(void *opaque);
49737e150eSPaolo Bonzini typedef void IOHandler(void *opaque);
50737e150eSPaolo Bonzini 
516a1751b7SAlex Bligh struct AioContext {
52737e150eSPaolo Bonzini     GSource source;
53737e150eSPaolo Bonzini 
5498563fc3SStefan Hajnoczi     /* Protects all fields from multi-threaded access */
5598563fc3SStefan Hajnoczi     RFifoLock lock;
5698563fc3SStefan Hajnoczi 
57737e150eSPaolo Bonzini     /* The list of registered AIO handlers */
58737e150eSPaolo Bonzini     QLIST_HEAD(, AioHandler) aio_handlers;
59737e150eSPaolo Bonzini 
60737e150eSPaolo Bonzini     /* This is a simple lock used to protect the aio_handlers list.
61737e150eSPaolo Bonzini      * Specifically, it's used to ensure that no callbacks are removed while
62737e150eSPaolo Bonzini      * we're walking and dispatching callbacks.
63737e150eSPaolo Bonzini      */
64737e150eSPaolo Bonzini     int walking_handlers;
65737e150eSPaolo Bonzini 
66*eabc9779SPaolo Bonzini     /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
67*eabc9779SPaolo Bonzini      * accessed with atomic primitives.  If this field is 0, everything
68*eabc9779SPaolo Bonzini      * (file descriptors, bottom halves, timers) will be re-evaluated
69*eabc9779SPaolo Bonzini      * before the next blocking poll(), thus the event_notifier_set call
70*eabc9779SPaolo Bonzini      * can be skipped.  If it is non-zero, you may need to wake up a
71*eabc9779SPaolo Bonzini      * concurrent aio_poll or the glib main event loop, making
72*eabc9779SPaolo Bonzini      * event_notifier_set necessary.
73*eabc9779SPaolo Bonzini      *
74*eabc9779SPaolo Bonzini      * Bit 0 is reserved for GSource usage of the AioContext, and is 1
75*eabc9779SPaolo Bonzini      * between a call to aio_ctx_check and the next call to aio_ctx_dispatch.
76*eabc9779SPaolo Bonzini      * Bits 1-31 simply count the number of active calls to aio_poll
77*eabc9779SPaolo Bonzini      * that are in the prepare or poll phase.
78*eabc9779SPaolo Bonzini      *
79*eabc9779SPaolo Bonzini      * The GSource and aio_poll must use a different mechanism because
80*eabc9779SPaolo Bonzini      * there is no certainty that a call to GSource's prepare callback
81*eabc9779SPaolo Bonzini      * (via g_main_context_prepare) is indeed followed by check and
82*eabc9779SPaolo Bonzini      * dispatch.  It's not clear whether this would be a bug, but let's
83*eabc9779SPaolo Bonzini      * play safe and allow it---it will just cause extra calls to
84*eabc9779SPaolo Bonzini      * event_notifier_set until the next call to dispatch.
85*eabc9779SPaolo Bonzini      *
86*eabc9779SPaolo Bonzini      * Instead, the aio_poll calls include both the prepare and the
87*eabc9779SPaolo Bonzini      * dispatch phase, hence a simple counter is enough for them.
880ceb849bSPaolo Bonzini      */
89*eabc9779SPaolo Bonzini     uint32_t notify_me;
900ceb849bSPaolo Bonzini 
91dcc772e2SLiu Ping Fan     /* lock to protect between bh's adders and deleter */
92dcc772e2SLiu Ping Fan     QemuMutex bh_lock;
930ceb849bSPaolo Bonzini 
94737e150eSPaolo Bonzini     /* Anchor of the list of Bottom Halves belonging to the context */
95737e150eSPaolo Bonzini     struct QEMUBH *first_bh;
96737e150eSPaolo Bonzini 
97737e150eSPaolo Bonzini     /* A simple lock used to protect the first_bh list, and ensure that
98737e150eSPaolo Bonzini      * no callbacks are removed while we're walking and dispatching callbacks.
99737e150eSPaolo Bonzini      */
100737e150eSPaolo Bonzini     int walking_bh;
101737e150eSPaolo Bonzini 
102737e150eSPaolo Bonzini     /* Used for aio_notify.  */
103737e150eSPaolo Bonzini     EventNotifier notifier;
1046b5f8762SStefan Hajnoczi 
1059b34277dSStefan Hajnoczi     /* Thread pool for performing work and receiving completion callbacks */
1069b34277dSStefan Hajnoczi     struct ThreadPool *thread_pool;
107dae21b98SAlex Bligh 
108dae21b98SAlex Bligh     /* TimerLists for calling timers - one per clock type */
109dae21b98SAlex Bligh     QEMUTimerListGroup tlg;
1106a1751b7SAlex Bligh };
111737e150eSPaolo Bonzini 
112737e150eSPaolo Bonzini /**
113737e150eSPaolo Bonzini  * aio_context_new: Allocate a new AioContext.
114737e150eSPaolo Bonzini  *
115737e150eSPaolo Bonzini  * AioContext provide a mini event-loop that can be waited on synchronously.
116737e150eSPaolo Bonzini  * They also provide bottom halves, a service to execute a piece of code
117737e150eSPaolo Bonzini  * as soon as possible.
118737e150eSPaolo Bonzini  */
1192f78e491SChrysostomos Nanakos AioContext *aio_context_new(Error **errp);
120737e150eSPaolo Bonzini 
121737e150eSPaolo Bonzini /**
122737e150eSPaolo Bonzini  * aio_context_ref:
123737e150eSPaolo Bonzini  * @ctx: The AioContext to operate on.
124737e150eSPaolo Bonzini  *
125737e150eSPaolo Bonzini  * Add a reference to an AioContext.
126737e150eSPaolo Bonzini  */
127737e150eSPaolo Bonzini void aio_context_ref(AioContext *ctx);
128737e150eSPaolo Bonzini 
129737e150eSPaolo Bonzini /**
130737e150eSPaolo Bonzini  * aio_context_unref:
131737e150eSPaolo Bonzini  * @ctx: The AioContext to operate on.
132737e150eSPaolo Bonzini  *
133737e150eSPaolo Bonzini  * Drop a reference to an AioContext.
134737e150eSPaolo Bonzini  */
135737e150eSPaolo Bonzini void aio_context_unref(AioContext *ctx);
136737e150eSPaolo Bonzini 
13798563fc3SStefan Hajnoczi /* Take ownership of the AioContext.  If the AioContext will be shared between
13849110174SPaolo Bonzini  * threads, and a thread does not want to be interrupted, it will have to
13949110174SPaolo Bonzini  * take ownership around calls to aio_poll().  Otherwise, aio_poll()
14049110174SPaolo Bonzini  * automatically takes care of calling aio_context_acquire and
14149110174SPaolo Bonzini  * aio_context_release.
14298563fc3SStefan Hajnoczi  *
14349110174SPaolo Bonzini  * Access to timers and BHs from a thread that has not acquired AioContext
14449110174SPaolo Bonzini  * is possible.  Access to callbacks for now must be done while the AioContext
14549110174SPaolo Bonzini  * is owned by the thread (FIXME).
14698563fc3SStefan Hajnoczi  */
14798563fc3SStefan Hajnoczi void aio_context_acquire(AioContext *ctx);
14898563fc3SStefan Hajnoczi 
14998563fc3SStefan Hajnoczi /* Relinquish ownership of the AioContext. */
15098563fc3SStefan Hajnoczi void aio_context_release(AioContext *ctx);
15198563fc3SStefan Hajnoczi 
152737e150eSPaolo Bonzini /**
153737e150eSPaolo Bonzini  * aio_bh_new: Allocate a new bottom half structure.
154737e150eSPaolo Bonzini  *
155737e150eSPaolo Bonzini  * Bottom halves are lightweight callbacks whose invocation is guaranteed
156737e150eSPaolo Bonzini  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
157737e150eSPaolo Bonzini  * is opaque and must be allocated prior to its use.
158737e150eSPaolo Bonzini  */
159737e150eSPaolo Bonzini QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
160737e150eSPaolo Bonzini 
161737e150eSPaolo Bonzini /**
162737e150eSPaolo Bonzini  * aio_notify: Force processing of pending events.
163737e150eSPaolo Bonzini  *
164737e150eSPaolo Bonzini  * Similar to signaling a condition variable, aio_notify forces
165737e150eSPaolo Bonzini  * aio_wait to exit, so that the next call will re-examine pending events.
166737e150eSPaolo Bonzini  * The caller of aio_notify will usually call aio_wait again very soon,
167737e150eSPaolo Bonzini  * or go through another iteration of the GLib main loop.  Hence, aio_notify
168737e150eSPaolo Bonzini  * also has the side effect of recalculating the sets of file descriptors
169737e150eSPaolo Bonzini  * that the main loop waits for.
170737e150eSPaolo Bonzini  *
171737e150eSPaolo Bonzini  * Calling aio_notify is rarely necessary, because for example scheduling
172737e150eSPaolo Bonzini  * a bottom half calls it already.
173737e150eSPaolo Bonzini  */
174737e150eSPaolo Bonzini void aio_notify(AioContext *ctx);
175737e150eSPaolo Bonzini 
176737e150eSPaolo Bonzini /**
177737e150eSPaolo Bonzini  * aio_bh_poll: Poll bottom halves for an AioContext.
178737e150eSPaolo Bonzini  *
179737e150eSPaolo Bonzini  * These are internal functions used by the QEMU main loop.
180dcc772e2SLiu Ping Fan  * And notice that multiple occurrences of aio_bh_poll cannot
181dcc772e2SLiu Ping Fan  * be called concurrently
182737e150eSPaolo Bonzini  */
183737e150eSPaolo Bonzini int aio_bh_poll(AioContext *ctx);
184737e150eSPaolo Bonzini 
185737e150eSPaolo Bonzini /**
186737e150eSPaolo Bonzini  * qemu_bh_schedule: Schedule a bottom half.
187737e150eSPaolo Bonzini  *
188737e150eSPaolo Bonzini  * Scheduling a bottom half interrupts the main loop and causes the
189737e150eSPaolo Bonzini  * execution of the callback that was passed to qemu_bh_new.
190737e150eSPaolo Bonzini  *
191737e150eSPaolo Bonzini  * Bottom halves that are scheduled from a bottom half handler are instantly
192737e150eSPaolo Bonzini  * invoked.  This can create an infinite loop if a bottom half handler
193737e150eSPaolo Bonzini  * schedules itself.
194737e150eSPaolo Bonzini  *
195737e150eSPaolo Bonzini  * @bh: The bottom half to be scheduled.
196737e150eSPaolo Bonzini  */
197737e150eSPaolo Bonzini void qemu_bh_schedule(QEMUBH *bh);
198737e150eSPaolo Bonzini 
199737e150eSPaolo Bonzini /**
200737e150eSPaolo Bonzini  * qemu_bh_cancel: Cancel execution of a bottom half.
201737e150eSPaolo Bonzini  *
202737e150eSPaolo Bonzini  * Canceling execution of a bottom half undoes the effect of calls to
203737e150eSPaolo Bonzini  * qemu_bh_schedule without freeing its resources yet.  While cancellation
204737e150eSPaolo Bonzini  * itself is also wait-free and thread-safe, it can of course race with the
205737e150eSPaolo Bonzini  * loop that executes bottom halves unless you are holding the iothread
206737e150eSPaolo Bonzini  * mutex.  This makes it mostly useless if you are not holding the mutex.
207737e150eSPaolo Bonzini  *
208737e150eSPaolo Bonzini  * @bh: The bottom half to be canceled.
209737e150eSPaolo Bonzini  */
210737e150eSPaolo Bonzini void qemu_bh_cancel(QEMUBH *bh);
211737e150eSPaolo Bonzini 
212737e150eSPaolo Bonzini /**
213737e150eSPaolo Bonzini  *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
214737e150eSPaolo Bonzini  *
215737e150eSPaolo Bonzini  * Deleting a bottom half frees the memory that was allocated for it by
216737e150eSPaolo Bonzini  * qemu_bh_new.  It also implies canceling the bottom half if it was
217737e150eSPaolo Bonzini  * scheduled.
218dcc772e2SLiu Ping Fan  * This func is async. The bottom half will do the delete action at the finial
219dcc772e2SLiu Ping Fan  * end.
220737e150eSPaolo Bonzini  *
221737e150eSPaolo Bonzini  * @bh: The bottom half to be deleted.
222737e150eSPaolo Bonzini  */
223737e150eSPaolo Bonzini void qemu_bh_delete(QEMUBH *bh);
224737e150eSPaolo Bonzini 
225737e150eSPaolo Bonzini /* Return whether there are any pending callbacks from the GSource
226a3462c65SPaolo Bonzini  * attached to the AioContext, before g_poll is invoked.
227a3462c65SPaolo Bonzini  *
228a3462c65SPaolo Bonzini  * This is used internally in the implementation of the GSource.
229a3462c65SPaolo Bonzini  */
230a3462c65SPaolo Bonzini bool aio_prepare(AioContext *ctx);
231a3462c65SPaolo Bonzini 
232a3462c65SPaolo Bonzini /* Return whether there are any pending callbacks from the GSource
233a3462c65SPaolo Bonzini  * attached to the AioContext, after g_poll is invoked.
234737e150eSPaolo Bonzini  *
235737e150eSPaolo Bonzini  * This is used internally in the implementation of the GSource.
236737e150eSPaolo Bonzini  */
237737e150eSPaolo Bonzini bool aio_pending(AioContext *ctx);
238737e150eSPaolo Bonzini 
239e4c7e2d1SPaolo Bonzini /* Dispatch any pending callbacks from the GSource attached to the AioContext.
240e4c7e2d1SPaolo Bonzini  *
241e4c7e2d1SPaolo Bonzini  * This is used internally in the implementation of the GSource.
242e4c7e2d1SPaolo Bonzini  */
243e4c7e2d1SPaolo Bonzini bool aio_dispatch(AioContext *ctx);
244e4c7e2d1SPaolo Bonzini 
245737e150eSPaolo Bonzini /* Progress in completing AIO work to occur.  This can issue new pending
246737e150eSPaolo Bonzini  * aio as a result of executing I/O completion or bh callbacks.
247737e150eSPaolo Bonzini  *
248acfb23adSPaolo Bonzini  * Return whether any progress was made by executing AIO or bottom half
249acfb23adSPaolo Bonzini  * handlers.  If @blocking == true, this should always be true except
250acfb23adSPaolo Bonzini  * if someone called aio_notify.
251737e150eSPaolo Bonzini  *
252737e150eSPaolo Bonzini  * If there are no pending bottom halves, but there are pending AIO
253737e150eSPaolo Bonzini  * operations, it may not be possible to make any progress without
254737e150eSPaolo Bonzini  * blocking.  If @blocking is true, this function will wait until one
255737e150eSPaolo Bonzini  * or more AIO events have completed, to ensure something has moved
256737e150eSPaolo Bonzini  * before returning.
257737e150eSPaolo Bonzini  */
258737e150eSPaolo Bonzini bool aio_poll(AioContext *ctx, bool blocking);
259737e150eSPaolo Bonzini 
260737e150eSPaolo Bonzini /* Register a file descriptor and associated callbacks.  Behaves very similarly
2616484e422SFam Zheng  * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
26287f68d31SPaolo Bonzini  * be invoked when using aio_poll().
263737e150eSPaolo Bonzini  *
264737e150eSPaolo Bonzini  * Code that invokes AIO completion functions should rely on this function
265737e150eSPaolo Bonzini  * instead of qemu_set_fd_handler[2].
266737e150eSPaolo Bonzini  */
267737e150eSPaolo Bonzini void aio_set_fd_handler(AioContext *ctx,
268737e150eSPaolo Bonzini                         int fd,
269737e150eSPaolo Bonzini                         IOHandler *io_read,
270737e150eSPaolo Bonzini                         IOHandler *io_write,
271737e150eSPaolo Bonzini                         void *opaque);
272737e150eSPaolo Bonzini 
273737e150eSPaolo Bonzini /* Register an event notifier and associated callbacks.  Behaves very similarly
274737e150eSPaolo Bonzini  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
27587f68d31SPaolo Bonzini  * will be invoked when using aio_poll().
276737e150eSPaolo Bonzini  *
277737e150eSPaolo Bonzini  * Code that invokes AIO completion functions should rely on this function
278737e150eSPaolo Bonzini  * instead of event_notifier_set_handler.
279737e150eSPaolo Bonzini  */
280737e150eSPaolo Bonzini void aio_set_event_notifier(AioContext *ctx,
281737e150eSPaolo Bonzini                             EventNotifier *notifier,
282f2e5dca4SStefan Hajnoczi                             EventNotifierHandler *io_read);
283737e150eSPaolo Bonzini 
284737e150eSPaolo Bonzini /* Return a GSource that lets the main loop poll the file descriptors attached
285737e150eSPaolo Bonzini  * to this AioContext.
286737e150eSPaolo Bonzini  */
287737e150eSPaolo Bonzini GSource *aio_get_g_source(AioContext *ctx);
288737e150eSPaolo Bonzini 
2899b34277dSStefan Hajnoczi /* Return the ThreadPool bound to this AioContext */
2909b34277dSStefan Hajnoczi struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
2919b34277dSStefan Hajnoczi 
2924e29e831SAlex Bligh /**
2934e29e831SAlex Bligh  * aio_timer_new:
2944e29e831SAlex Bligh  * @ctx: the aio context
2954e29e831SAlex Bligh  * @type: the clock type
2964e29e831SAlex Bligh  * @scale: the scale
2974e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
2984e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
2994e29e831SAlex Bligh  *
3004e29e831SAlex Bligh  * Allocate a new timer attached to the context @ctx.
3014e29e831SAlex Bligh  * The function is responsible for memory allocation.
3024e29e831SAlex Bligh  *
3034e29e831SAlex Bligh  * The preferred interface is aio_timer_init. Use that
3044e29e831SAlex Bligh  * unless you really need dynamic memory allocation.
3054e29e831SAlex Bligh  *
3064e29e831SAlex Bligh  * Returns: a pointer to the new timer
3074e29e831SAlex Bligh  */
3084e29e831SAlex Bligh static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
3094e29e831SAlex Bligh                                        int scale,
3104e29e831SAlex Bligh                                        QEMUTimerCB *cb, void *opaque)
3114e29e831SAlex Bligh {
3124e29e831SAlex Bligh     return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
3134e29e831SAlex Bligh }
3144e29e831SAlex Bligh 
3154e29e831SAlex Bligh /**
3164e29e831SAlex Bligh  * aio_timer_init:
3174e29e831SAlex Bligh  * @ctx: the aio context
3184e29e831SAlex Bligh  * @ts: the timer
3194e29e831SAlex Bligh  * @type: the clock type
3204e29e831SAlex Bligh  * @scale: the scale
3214e29e831SAlex Bligh  * @cb: the callback to call on timer expiry
3224e29e831SAlex Bligh  * @opaque: the opaque pointer to pass to the callback
3234e29e831SAlex Bligh  *
3244e29e831SAlex Bligh  * Initialise a new timer attached to the context @ctx.
3254e29e831SAlex Bligh  * The caller is responsible for memory allocation.
3264e29e831SAlex Bligh  */
3274e29e831SAlex Bligh static inline void aio_timer_init(AioContext *ctx,
3284e29e831SAlex Bligh                                   QEMUTimer *ts, QEMUClockType type,
3294e29e831SAlex Bligh                                   int scale,
3304e29e831SAlex Bligh                                   QEMUTimerCB *cb, void *opaque)
3314e29e831SAlex Bligh {
332f186aa97SPaolo Bonzini     timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
3334e29e831SAlex Bligh }
3344e29e831SAlex Bligh 
335845ca10dSPaolo Bonzini /**
336845ca10dSPaolo Bonzini  * aio_compute_timeout:
337845ca10dSPaolo Bonzini  * @ctx: the aio context
338845ca10dSPaolo Bonzini  *
339845ca10dSPaolo Bonzini  * Compute the timeout that a blocking aio_poll should use.
340845ca10dSPaolo Bonzini  */
341845ca10dSPaolo Bonzini int64_t aio_compute_timeout(AioContext *ctx);
342845ca10dSPaolo Bonzini 
343737e150eSPaolo Bonzini #endif
344