xref: /openbmc/qemu/include/block/aio.h (revision 8692aa29798e0f2cb5069f2460bbe19ff538fc71)
1 /*
2  * QEMU aio implementation
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #ifndef QEMU_AIO_H
15 #define QEMU_AIO_H
16 
17 #include "qemu-common.h"
18 #include "qemu/queue.h"
19 #include "qemu/event_notifier.h"
20 #include "qemu/thread.h"
21 #include "qemu/rfifolock.h"
22 #include "qemu/timer.h"
23 
24 typedef struct BlockAIOCB BlockAIOCB;
25 typedef void BlockCompletionFunc(void *opaque, int ret);
26 
27 typedef struct AIOCBInfo {
28     void (*cancel_async)(BlockAIOCB *acb);
29     AioContext *(*get_aio_context)(BlockAIOCB *acb);
30     size_t aiocb_size;
31 } AIOCBInfo;
32 
33 struct BlockAIOCB {
34     const AIOCBInfo *aiocb_info;
35     BlockDriverState *bs;
36     BlockCompletionFunc *cb;
37     void *opaque;
38     int refcnt;
39 };
40 
41 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
42                    BlockCompletionFunc *cb, void *opaque);
43 void qemu_aio_unref(void *p);
44 void qemu_aio_ref(void *p);
45 
46 typedef struct AioHandler AioHandler;
47 typedef void QEMUBHFunc(void *opaque);
48 typedef void IOHandler(void *opaque);
49 
50 struct AioContext {
51     GSource source;
52 
53     /* Protects all fields from multi-threaded access */
54     RFifoLock lock;
55 
56     /* The list of registered AIO handlers */
57     QLIST_HEAD(, AioHandler) aio_handlers;
58 
59     /* This is a simple lock used to protect the aio_handlers list.
60      * Specifically, it's used to ensure that no callbacks are removed while
61      * we're walking and dispatching callbacks.
62      */
63     int walking_handlers;
64 
65     /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
66      * accessed with atomic primitives.  If this field is 0, everything
67      * (file descriptors, bottom halves, timers) will be re-evaluated
68      * before the next blocking poll(), thus the event_notifier_set call
69      * can be skipped.  If it is non-zero, you may need to wake up a
70      * concurrent aio_poll or the glib main event loop, making
71      * event_notifier_set necessary.
72      *
73      * Bit 0 is reserved for GSource usage of the AioContext, and is 1
74      * between a call to aio_ctx_check and the next call to aio_ctx_dispatch.
75      * Bits 1-31 simply count the number of active calls to aio_poll
76      * that are in the prepare or poll phase.
77      *
78      * The GSource and aio_poll must use a different mechanism because
79      * there is no certainty that a call to GSource's prepare callback
80      * (via g_main_context_prepare) is indeed followed by check and
81      * dispatch.  It's not clear whether this would be a bug, but let's
82      * play safe and allow it---it will just cause extra calls to
83      * event_notifier_set until the next call to dispatch.
84      *
85      * Instead, the aio_poll calls include both the prepare and the
86      * dispatch phase, hence a simple counter is enough for them.
87      */
88     uint32_t notify_me;
89 
90     /* lock to protect between bh's adders and deleter */
91     QemuMutex bh_lock;
92 
93     /* Anchor of the list of Bottom Halves belonging to the context */
94     struct QEMUBH *first_bh;
95 
96     /* A simple lock used to protect the first_bh list, and ensure that
97      * no callbacks are removed while we're walking and dispatching callbacks.
98      */
99     int walking_bh;
100 
101     /* Used by aio_notify.
102      *
103      * "notified" is used to avoid expensive event_notifier_test_and_clear
104      * calls.  When it is clear, the EventNotifier is clear, or one thread
105      * is going to clear "notified" before processing more events.  False
106      * positives are possible, i.e. "notified" could be set even though the
107      * EventNotifier is clear.
108      *
109      * Note that event_notifier_set *cannot* be optimized the same way.  For
110      * more information on the problem that would result, see "#ifdef BUG2"
111      * in the docs/aio_notify_accept.promela formal model.
112      */
113     bool notified;
114     EventNotifier notifier;
115 
116     /* Scheduling this BH forces the event loop it iterate */
117     QEMUBH *notify_dummy_bh;
118 
119     /* Thread pool for performing work and receiving completion callbacks */
120     struct ThreadPool *thread_pool;
121 
122     /* TimerLists for calling timers - one per clock type */
123     QEMUTimerListGroup tlg;
124 
125     int external_disable_cnt;
126 
127     /* epoll(7) state used when built with CONFIG_EPOLL */
128     int epollfd;
129     bool epoll_enabled;
130     bool epoll_available;
131 };
132 
133 /**
134  * aio_context_new: Allocate a new AioContext.
135  *
136  * AioContext provide a mini event-loop that can be waited on synchronously.
137  * They also provide bottom halves, a service to execute a piece of code
138  * as soon as possible.
139  */
140 AioContext *aio_context_new(Error **errp);
141 
142 /**
143  * aio_context_ref:
144  * @ctx: The AioContext to operate on.
145  *
146  * Add a reference to an AioContext.
147  */
148 void aio_context_ref(AioContext *ctx);
149 
150 /**
151  * aio_context_unref:
152  * @ctx: The AioContext to operate on.
153  *
154  * Drop a reference to an AioContext.
155  */
156 void aio_context_unref(AioContext *ctx);
157 
158 /* Take ownership of the AioContext.  If the AioContext will be shared between
159  * threads, and a thread does not want to be interrupted, it will have to
160  * take ownership around calls to aio_poll().  Otherwise, aio_poll()
161  * automatically takes care of calling aio_context_acquire and
162  * aio_context_release.
163  *
164  * Access to timers and BHs from a thread that has not acquired AioContext
165  * is possible.  Access to callbacks for now must be done while the AioContext
166  * is owned by the thread (FIXME).
167  */
168 void aio_context_acquire(AioContext *ctx);
169 
170 /* Relinquish ownership of the AioContext. */
171 void aio_context_release(AioContext *ctx);
172 
173 /**
174  * aio_bh_new: Allocate a new bottom half structure.
175  *
176  * Bottom halves are lightweight callbacks whose invocation is guaranteed
177  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
178  * is opaque and must be allocated prior to its use.
179  */
180 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
181 
182 /**
183  * aio_notify: Force processing of pending events.
184  *
185  * Similar to signaling a condition variable, aio_notify forces
186  * aio_wait to exit, so that the next call will re-examine pending events.
187  * The caller of aio_notify will usually call aio_wait again very soon,
188  * or go through another iteration of the GLib main loop.  Hence, aio_notify
189  * also has the side effect of recalculating the sets of file descriptors
190  * that the main loop waits for.
191  *
192  * Calling aio_notify is rarely necessary, because for example scheduling
193  * a bottom half calls it already.
194  */
195 void aio_notify(AioContext *ctx);
196 
197 /**
198  * aio_notify_accept: Acknowledge receiving an aio_notify.
199  *
200  * aio_notify() uses an EventNotifier in order to wake up a sleeping
201  * aio_poll() or g_main_context_iteration().  Calls to aio_notify() are
202  * usually rare, but the AioContext has to clear the EventNotifier on
203  * every aio_poll() or g_main_context_iteration() in order to avoid
204  * busy waiting.  This event_notifier_test_and_clear() cannot be done
205  * using the usual aio_context_set_event_notifier(), because it must
206  * be done before processing all events (file descriptors, bottom halves,
207  * timers).
208  *
209  * aio_notify_accept() is an optimized event_notifier_test_and_clear()
210  * that is specific to an AioContext's notifier; it is used internally
211  * to clear the EventNotifier only if aio_notify() had been called.
212  */
213 void aio_notify_accept(AioContext *ctx);
214 
215 /**
216  * aio_bh_call: Executes callback function of the specified BH.
217  */
218 void aio_bh_call(QEMUBH *bh);
219 
220 /**
221  * aio_bh_poll: Poll bottom halves for an AioContext.
222  *
223  * These are internal functions used by the QEMU main loop.
224  * And notice that multiple occurrences of aio_bh_poll cannot
225  * be called concurrently
226  */
227 int aio_bh_poll(AioContext *ctx);
228 
229 /**
230  * qemu_bh_schedule: Schedule a bottom half.
231  *
232  * Scheduling a bottom half interrupts the main loop and causes the
233  * execution of the callback that was passed to qemu_bh_new.
234  *
235  * Bottom halves that are scheduled from a bottom half handler are instantly
236  * invoked.  This can create an infinite loop if a bottom half handler
237  * schedules itself.
238  *
239  * @bh: The bottom half to be scheduled.
240  */
241 void qemu_bh_schedule(QEMUBH *bh);
242 
243 /**
244  * qemu_bh_cancel: Cancel execution of a bottom half.
245  *
246  * Canceling execution of a bottom half undoes the effect of calls to
247  * qemu_bh_schedule without freeing its resources yet.  While cancellation
248  * itself is also wait-free and thread-safe, it can of course race with the
249  * loop that executes bottom halves unless you are holding the iothread
250  * mutex.  This makes it mostly useless if you are not holding the mutex.
251  *
252  * @bh: The bottom half to be canceled.
253  */
254 void qemu_bh_cancel(QEMUBH *bh);
255 
256 /**
257  *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
258  *
259  * Deleting a bottom half frees the memory that was allocated for it by
260  * qemu_bh_new.  It also implies canceling the bottom half if it was
261  * scheduled.
262  * This func is async. The bottom half will do the delete action at the finial
263  * end.
264  *
265  * @bh: The bottom half to be deleted.
266  */
267 void qemu_bh_delete(QEMUBH *bh);
268 
269 /* Return whether there are any pending callbacks from the GSource
270  * attached to the AioContext, before g_poll is invoked.
271  *
272  * This is used internally in the implementation of the GSource.
273  */
274 bool aio_prepare(AioContext *ctx);
275 
276 /* Return whether there are any pending callbacks from the GSource
277  * attached to the AioContext, after g_poll is invoked.
278  *
279  * This is used internally in the implementation of the GSource.
280  */
281 bool aio_pending(AioContext *ctx);
282 
283 /* Dispatch any pending callbacks from the GSource attached to the AioContext.
284  *
285  * This is used internally in the implementation of the GSource.
286  */
287 bool aio_dispatch(AioContext *ctx);
288 
289 /* Progress in completing AIO work to occur.  This can issue new pending
290  * aio as a result of executing I/O completion or bh callbacks.
291  *
292  * Return whether any progress was made by executing AIO or bottom half
293  * handlers.  If @blocking == true, this should always be true except
294  * if someone called aio_notify.
295  *
296  * If there are no pending bottom halves, but there are pending AIO
297  * operations, it may not be possible to make any progress without
298  * blocking.  If @blocking is true, this function will wait until one
299  * or more AIO events have completed, to ensure something has moved
300  * before returning.
301  */
302 bool aio_poll(AioContext *ctx, bool blocking);
303 
304 /* Register a file descriptor and associated callbacks.  Behaves very similarly
305  * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
306  * be invoked when using aio_poll().
307  *
308  * Code that invokes AIO completion functions should rely on this function
309  * instead of qemu_set_fd_handler[2].
310  */
311 void aio_set_fd_handler(AioContext *ctx,
312                         int fd,
313                         bool is_external,
314                         IOHandler *io_read,
315                         IOHandler *io_write,
316                         void *opaque);
317 
318 /* Register an event notifier and associated callbacks.  Behaves very similarly
319  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
320  * will be invoked when using aio_poll().
321  *
322  * Code that invokes AIO completion functions should rely on this function
323  * instead of event_notifier_set_handler.
324  */
325 void aio_set_event_notifier(AioContext *ctx,
326                             EventNotifier *notifier,
327                             bool is_external,
328                             EventNotifierHandler *io_read);
329 
330 /* Return a GSource that lets the main loop poll the file descriptors attached
331  * to this AioContext.
332  */
333 GSource *aio_get_g_source(AioContext *ctx);
334 
335 /* Return the ThreadPool bound to this AioContext */
336 struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
337 
338 /**
339  * aio_timer_new:
340  * @ctx: the aio context
341  * @type: the clock type
342  * @scale: the scale
343  * @cb: the callback to call on timer expiry
344  * @opaque: the opaque pointer to pass to the callback
345  *
346  * Allocate a new timer attached to the context @ctx.
347  * The function is responsible for memory allocation.
348  *
349  * The preferred interface is aio_timer_init. Use that
350  * unless you really need dynamic memory allocation.
351  *
352  * Returns: a pointer to the new timer
353  */
354 static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
355                                        int scale,
356                                        QEMUTimerCB *cb, void *opaque)
357 {
358     return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
359 }
360 
361 /**
362  * aio_timer_init:
363  * @ctx: the aio context
364  * @ts: the timer
365  * @type: the clock type
366  * @scale: the scale
367  * @cb: the callback to call on timer expiry
368  * @opaque: the opaque pointer to pass to the callback
369  *
370  * Initialise a new timer attached to the context @ctx.
371  * The caller is responsible for memory allocation.
372  */
373 static inline void aio_timer_init(AioContext *ctx,
374                                   QEMUTimer *ts, QEMUClockType type,
375                                   int scale,
376                                   QEMUTimerCB *cb, void *opaque)
377 {
378     timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
379 }
380 
381 /**
382  * aio_compute_timeout:
383  * @ctx: the aio context
384  *
385  * Compute the timeout that a blocking aio_poll should use.
386  */
387 int64_t aio_compute_timeout(AioContext *ctx);
388 
389 /**
390  * aio_disable_external:
391  * @ctx: the aio context
392  *
393  * Disable the further processing of external clients.
394  */
395 static inline void aio_disable_external(AioContext *ctx)
396 {
397     atomic_inc(&ctx->external_disable_cnt);
398 }
399 
400 /**
401  * aio_enable_external:
402  * @ctx: the aio context
403  *
404  * Enable the processing of external clients.
405  */
406 static inline void aio_enable_external(AioContext *ctx)
407 {
408     assert(ctx->external_disable_cnt > 0);
409     atomic_dec(&ctx->external_disable_cnt);
410 }
411 
412 /**
413  * aio_external_disabled:
414  * @ctx: the aio context
415  *
416  * Return true if the external clients are disabled.
417  */
418 static inline bool aio_external_disabled(AioContext *ctx)
419 {
420     return atomic_read(&ctx->external_disable_cnt);
421 }
422 
423 /**
424  * aio_node_check:
425  * @ctx: the aio context
426  * @is_external: Whether or not the checked node is an external event source.
427  *
428  * Check if the node's is_external flag is okay to be polled by the ctx at this
429  * moment. True means green light.
430  */
431 static inline bool aio_node_check(AioContext *ctx, bool is_external)
432 {
433     return !is_external || !atomic_read(&ctx->external_disable_cnt);
434 }
435 
436 /**
437  * aio_context_setup:
438  * @ctx: the aio context
439  *
440  * Initialize the aio context.
441  */
442 void aio_context_setup(AioContext *ctx, Error **errp);
443 
444 #endif
445