110817bf0SDaniel P. Berrange /*
210817bf0SDaniel P. Berrange * QEMU coroutine implementation
310817bf0SDaniel P. Berrange *
410817bf0SDaniel P. Berrange * Copyright IBM, Corp. 2011
510817bf0SDaniel P. Berrange *
610817bf0SDaniel P. Berrange * Authors:
710817bf0SDaniel P. Berrange * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
810817bf0SDaniel P. Berrange * Kevin Wolf <kwolf@redhat.com>
910817bf0SDaniel P. Berrange *
1010817bf0SDaniel P. Berrange * This work is licensed under the terms of the GNU LGPL, version 2 or later.
1110817bf0SDaniel P. Berrange * See the COPYING.LIB file in the top-level directory.
1210817bf0SDaniel P. Berrange *
1310817bf0SDaniel P. Berrange */
1410817bf0SDaniel P. Berrange
1510817bf0SDaniel P. Berrange #ifndef QEMU_COROUTINE_H
1610817bf0SDaniel P. Berrange #define QEMU_COROUTINE_H
1710817bf0SDaniel P. Berrange
18*68ba85ceSMarkus Armbruster #include "qemu/coroutine-core.h"
1910817bf0SDaniel P. Berrange #include "qemu/queue.h"
2010817bf0SDaniel P. Berrange #include "qemu/timer.h"
2110817bf0SDaniel P. Berrange
2210817bf0SDaniel P. Berrange /**
2310817bf0SDaniel P. Berrange * Coroutines are a mechanism for stack switching and can be used for
2410817bf0SDaniel P. Berrange * cooperative userspace threading. These functions provide a simple but
2510817bf0SDaniel P. Berrange * useful flavor of coroutines that is suitable for writing sequential code,
2610817bf0SDaniel P. Berrange * rather than callbacks, for operations that need to give up control while
2710817bf0SDaniel P. Berrange * waiting for events to complete.
2810817bf0SDaniel P. Berrange *
2910817bf0SDaniel P. Berrange * These functions are re-entrant and may be used outside the global mutex.
3010817bf0SDaniel P. Berrange *
31af7f8eb5SMarkus Armbruster * Functions that execute in coroutine context cannot be called
32af7f8eb5SMarkus Armbruster * directly from normal functions. Use @coroutine_fn to mark such
33af7f8eb5SMarkus Armbruster * functions. For example:
3410817bf0SDaniel P. Berrange *
3510817bf0SDaniel P. Berrange * static void coroutine_fn foo(void) {
3610817bf0SDaniel P. Berrange * ....
3710817bf0SDaniel P. Berrange * }
38af7f8eb5SMarkus Armbruster *
39af7f8eb5SMarkus Armbruster * In the future it would be nice to have the compiler or a static
40af7f8eb5SMarkus Armbruster * checker catch misuse of such functions. This annotation might make
41af7f8eb5SMarkus Armbruster * it possible and in the meantime it serves as documentation.
4210817bf0SDaniel P. Berrange */
4310817bf0SDaniel P. Berrange
4410817bf0SDaniel P. Berrange /**
4510817bf0SDaniel P. Berrange * Provides a mutex that can be used to synchronise coroutines
4610817bf0SDaniel P. Berrange */
47fed20a70SPaolo Bonzini struct CoWaitRecord;
48e70372fcSPaolo Bonzini struct CoMutex {
49fed20a70SPaolo Bonzini /* Count of pending lockers; 0 for a free mutex, 1 for an
50fed20a70SPaolo Bonzini * uncontended mutex.
51fed20a70SPaolo Bonzini */
52fed20a70SPaolo Bonzini unsigned locked;
53fed20a70SPaolo Bonzini
54480cff63SPaolo Bonzini /* Context that is holding the lock. Useful to avoid spinning
55480cff63SPaolo Bonzini * when two coroutines on the same AioContext try to get the lock. :)
56480cff63SPaolo Bonzini */
57480cff63SPaolo Bonzini AioContext *ctx;
58480cff63SPaolo Bonzini
59fed20a70SPaolo Bonzini /* A queue of waiters. Elements are added atomically in front of
60fed20a70SPaolo Bonzini * from_push. to_pop is only populated, and popped from, by whoever
61fed20a70SPaolo Bonzini * is in charge of the next wakeup. This can be an unlocker or,
62fed20a70SPaolo Bonzini * through the handoff protocol, a locker that is about to go to sleep.
63fed20a70SPaolo Bonzini */
64fed20a70SPaolo Bonzini QSLIST_HEAD(, CoWaitRecord) from_push, to_pop;
65fed20a70SPaolo Bonzini
66fed20a70SPaolo Bonzini unsigned handoff, sequence;
67fed20a70SPaolo Bonzini
680e438cdcSKevin Wolf Coroutine *holder;
69e70372fcSPaolo Bonzini };
7010817bf0SDaniel P. Berrange
7110817bf0SDaniel P. Berrange /**
72944f3d5dSKevin Wolf * Assert that the current coroutine holds @mutex.
73944f3d5dSKevin Wolf */
qemu_co_mutex_assert_locked(CoMutex * mutex)74944f3d5dSKevin Wolf static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex)
75944f3d5dSKevin Wolf {
76944f3d5dSKevin Wolf /*
77944f3d5dSKevin Wolf * mutex->holder doesn't need any synchronisation if the assertion holds
78944f3d5dSKevin Wolf * true because the mutex protects it. If it doesn't hold true, we still
79944f3d5dSKevin Wolf * don't mind if another thread takes or releases mutex behind our back,
80944f3d5dSKevin Wolf * because the condition will be false no matter whether we read NULL or
81944f3d5dSKevin Wolf * the pointer for any other coroutine.
82944f3d5dSKevin Wolf */
83d73415a3SStefan Hajnoczi assert(qatomic_read(&mutex->locked) &&
84944f3d5dSKevin Wolf mutex->holder == qemu_coroutine_self());
85944f3d5dSKevin Wolf }
86f8c6e1cbSPaolo Bonzini
87f8c6e1cbSPaolo Bonzini /**
88f8c6e1cbSPaolo Bonzini * CoQueues are a mechanism to queue coroutines in order to continue executing
891ace7ceaSPaolo Bonzini * them later. They are similar to condition variables, but they need help
901ace7ceaSPaolo Bonzini * from an external mutex in order to maintain thread-safety.
91f8c6e1cbSPaolo Bonzini */
92f8c6e1cbSPaolo Bonzini typedef struct CoQueue {
93f8c6e1cbSPaolo Bonzini QSIMPLEQ_HEAD(, Coroutine) entries;
94f8c6e1cbSPaolo Bonzini } CoQueue;
95f8c6e1cbSPaolo Bonzini
96f8c6e1cbSPaolo Bonzini /**
97f8c6e1cbSPaolo Bonzini * Initialise a CoQueue. This must be called before any other operation is used
98f8c6e1cbSPaolo Bonzini * on the CoQueue.
99f8c6e1cbSPaolo Bonzini */
100f8c6e1cbSPaolo Bonzini void qemu_co_queue_init(CoQueue *queue);
101f8c6e1cbSPaolo Bonzini
1020421b563SStefan Hajnoczi typedef enum {
1030421b563SStefan Hajnoczi /*
1040421b563SStefan Hajnoczi * Enqueue at front instead of back. Use this to re-queue a request when
1050421b563SStefan Hajnoczi * its wait condition is not satisfied after being woken up.
1060421b563SStefan Hajnoczi */
1070421b563SStefan Hajnoczi CO_QUEUE_WAIT_FRONT = 0x1,
1080421b563SStefan Hajnoczi } CoQueueWaitFlags;
1090421b563SStefan Hajnoczi
110f8c6e1cbSPaolo Bonzini /**
111f8c6e1cbSPaolo Bonzini * Adds the current coroutine to the CoQueue and transfers control to the
1121ace7ceaSPaolo Bonzini * caller of the coroutine. The mutex is unlocked during the wait and
1131ace7ceaSPaolo Bonzini * locked again afterwards.
114f8c6e1cbSPaolo Bonzini */
1151a957cf9SPaolo Bonzini #define qemu_co_queue_wait(queue, lock) \
1160421b563SStefan Hajnoczi qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), 0)
1170421b563SStefan Hajnoczi #define qemu_co_queue_wait_flags(queue, lock, flags) \
1180421b563SStefan Hajnoczi qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), (flags))
1190421b563SStefan Hajnoczi void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
1200421b563SStefan Hajnoczi CoQueueWaitFlags flags);
121f8c6e1cbSPaolo Bonzini
122f8c6e1cbSPaolo Bonzini /**
123248af9e8SPaolo Bonzini * Removes the next coroutine from the CoQueue, and queue it to run after
124248af9e8SPaolo Bonzini * the currently-running coroutine yields.
1255261dd7bSPaolo Bonzini * Returns true if a coroutine was removed, false if the queue is empty.
126248af9e8SPaolo Bonzini * Used from coroutine context, use qemu_co_enter_next outside.
127f8c6e1cbSPaolo Bonzini */
128248af9e8SPaolo Bonzini bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
129f8c6e1cbSPaolo Bonzini
130f8c6e1cbSPaolo Bonzini /**
131f0d43b1eSPaolo Bonzini * Empties the CoQueue and queues the coroutine to run after
132f0d43b1eSPaolo Bonzini * the currently-running coroutine yields.
133f0d43b1eSPaolo Bonzini * Used from coroutine context, use qemu_co_enter_all outside.
134f8c6e1cbSPaolo Bonzini */
135f0d43b1eSPaolo Bonzini void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
136f8c6e1cbSPaolo Bonzini
137f8c6e1cbSPaolo Bonzini /**
1385261dd7bSPaolo Bonzini * Removes the next coroutine from the CoQueue, and wake it up. Unlike
1395261dd7bSPaolo Bonzini * qemu_co_queue_next, this function releases the lock during aio_co_wake
1405261dd7bSPaolo Bonzini * because it is meant to be used outside coroutine context; in that case, the
1415261dd7bSPaolo Bonzini * coroutine is entered immediately, before qemu_co_enter_next returns.
1425261dd7bSPaolo Bonzini *
1435261dd7bSPaolo Bonzini * If used in coroutine context, qemu_co_enter_next is equivalent to
1445261dd7bSPaolo Bonzini * qemu_co_queue_next.
145f8c6e1cbSPaolo Bonzini */
1465261dd7bSPaolo Bonzini #define qemu_co_enter_next(queue, lock) \
1475261dd7bSPaolo Bonzini qemu_co_enter_next_impl(queue, QEMU_MAKE_LOCKABLE(lock))
1485261dd7bSPaolo Bonzini bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock);
149f8c6e1cbSPaolo Bonzini
150f8c6e1cbSPaolo Bonzini /**
151d6ee15adSPaolo Bonzini * Empties the CoQueue, waking the waiting coroutine one at a time. Unlike
152d6ee15adSPaolo Bonzini * qemu_co_queue_all, this function releases the lock during aio_co_wake
153d6ee15adSPaolo Bonzini * because it is meant to be used outside coroutine context; in that case, the
154d6ee15adSPaolo Bonzini * coroutine is entered immediately, before qemu_co_enter_all returns.
155d6ee15adSPaolo Bonzini *
156d6ee15adSPaolo Bonzini * If used in coroutine context, qemu_co_enter_all is equivalent to
157d6ee15adSPaolo Bonzini * qemu_co_queue_all.
158d6ee15adSPaolo Bonzini */
159d6ee15adSPaolo Bonzini #define qemu_co_enter_all(queue, lock) \
160d6ee15adSPaolo Bonzini qemu_co_enter_all_impl(queue, QEMU_MAKE_LOCKABLE(lock))
161d6ee15adSPaolo Bonzini void qemu_co_enter_all_impl(CoQueue *queue, QemuLockable *lock);
162d6ee15adSPaolo Bonzini
163d6ee15adSPaolo Bonzini /**
164f8c6e1cbSPaolo Bonzini * Checks if the CoQueue is empty.
165f8c6e1cbSPaolo Bonzini */
166f8c6e1cbSPaolo Bonzini bool qemu_co_queue_empty(CoQueue *queue);
167f8c6e1cbSPaolo Bonzini
168f8c6e1cbSPaolo Bonzini
169050de36bSPaolo Bonzini typedef struct CoRwTicket CoRwTicket;
17010817bf0SDaniel P. Berrange typedef struct CoRwlock {
171a7b91d35SPaolo Bonzini CoMutex mutex;
172050de36bSPaolo Bonzini
173050de36bSPaolo Bonzini /* Number of readers, or -1 if owned for writing. */
174050de36bSPaolo Bonzini int owners;
175050de36bSPaolo Bonzini
176050de36bSPaolo Bonzini /* Waiting coroutines. */
177050de36bSPaolo Bonzini QSIMPLEQ_HEAD(, CoRwTicket) tickets;
17810817bf0SDaniel P. Berrange } CoRwlock;
17910817bf0SDaniel P. Berrange
18010817bf0SDaniel P. Berrange /**
18110817bf0SDaniel P. Berrange * Initialises a CoRwlock. This must be called before any other operation
18210817bf0SDaniel P. Berrange * is used on the CoRwlock
18310817bf0SDaniel P. Berrange */
18410817bf0SDaniel P. Berrange void qemu_co_rwlock_init(CoRwlock *lock);
18510817bf0SDaniel P. Berrange
18610817bf0SDaniel P. Berrange /**
18710817bf0SDaniel P. Berrange * Read locks the CoRwlock. If the lock cannot be taken immediately because
18810817bf0SDaniel P. Berrange * of a parallel writer, control is transferred to the caller of the current
18910817bf0SDaniel P. Berrange * coroutine.
19010817bf0SDaniel P. Berrange */
191d63f006aSAlberto Faria void coroutine_fn qemu_co_rwlock_rdlock(CoRwlock *lock);
19210817bf0SDaniel P. Berrange
19310817bf0SDaniel P. Berrange /**
194667221c1SPaolo Bonzini * Write Locks the CoRwlock from a reader. This is a bit more efficient than
195667221c1SPaolo Bonzini * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock.
196050de36bSPaolo Bonzini * Note that if the lock cannot be upgraded immediately, control is transferred
197050de36bSPaolo Bonzini * to the caller of the current coroutine; another writer might run while
198050de36bSPaolo Bonzini * @qemu_co_rwlock_upgrade blocks.
199667221c1SPaolo Bonzini */
200d63f006aSAlberto Faria void coroutine_fn qemu_co_rwlock_upgrade(CoRwlock *lock);
201667221c1SPaolo Bonzini
202667221c1SPaolo Bonzini /**
203667221c1SPaolo Bonzini * Downgrades a write-side critical section to a reader. Downgrading with
204667221c1SPaolo Bonzini * @qemu_co_rwlock_downgrade never blocks, unlike @qemu_co_rwlock_unlock
205667221c1SPaolo Bonzini * followed by @qemu_co_rwlock_rdlock. This makes it more efficient, but
206667221c1SPaolo Bonzini * may also sometimes be necessary for correctness.
207667221c1SPaolo Bonzini */
208d63f006aSAlberto Faria void coroutine_fn qemu_co_rwlock_downgrade(CoRwlock *lock);
209667221c1SPaolo Bonzini
210667221c1SPaolo Bonzini /**
21110817bf0SDaniel P. Berrange * Write Locks the mutex. If the lock cannot be taken immediately because
21210817bf0SDaniel P. Berrange * of a parallel reader, control is transferred to the caller of the current
21310817bf0SDaniel P. Berrange * coroutine.
21410817bf0SDaniel P. Berrange */
215d63f006aSAlberto Faria void coroutine_fn qemu_co_rwlock_wrlock(CoRwlock *lock);
21610817bf0SDaniel P. Berrange
21710817bf0SDaniel P. Berrange /**
21810817bf0SDaniel P. Berrange * Unlocks the read/write lock and schedules the next coroutine that was
21910817bf0SDaniel P. Berrange * waiting for this lock to be run.
22010817bf0SDaniel P. Berrange */
221d63f006aSAlberto Faria void coroutine_fn qemu_co_rwlock_unlock(CoRwlock *lock);
22210817bf0SDaniel P. Berrange
22329a6ea24SPaolo Bonzini typedef struct QemuCoSleep {
22429a6ea24SPaolo Bonzini Coroutine *to_wake;
22529a6ea24SPaolo Bonzini } QemuCoSleep;
2263d692649SVladimir Sementsov-Ogievskiy
22710817bf0SDaniel P. Berrange /**
22829a6ea24SPaolo Bonzini * Yield the coroutine for a given duration. Initializes @w so that,
22929a6ea24SPaolo Bonzini * during this yield, it can be passed to qemu_co_sleep_wake() to
23029a6ea24SPaolo Bonzini * terminate the sleep.
23110817bf0SDaniel P. Berrange */
23229a6ea24SPaolo Bonzini void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
23329a6ea24SPaolo Bonzini QEMUClockType type, int64_t ns);
23429a6ea24SPaolo Bonzini
2350a6f0c76SPaolo Bonzini /**
2360a6f0c76SPaolo Bonzini * Yield the coroutine until the next call to qemu_co_sleep_wake.
2370a6f0c76SPaolo Bonzini */
2380a6f0c76SPaolo Bonzini void coroutine_fn qemu_co_sleep(QemuCoSleep *w);
2390a6f0c76SPaolo Bonzini
qemu_co_sleep_ns(QEMUClockType type,int64_t ns)2403d692649SVladimir Sementsov-Ogievskiy static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
2413d692649SVladimir Sementsov-Ogievskiy {
24229a6ea24SPaolo Bonzini QemuCoSleep w = { 0 };
24329a6ea24SPaolo Bonzini qemu_co_sleep_ns_wakeable(&w, type, ns);
2443d692649SVladimir Sementsov-Ogievskiy }
2453d692649SVladimir Sementsov-Ogievskiy
246e1878eb5SVladimir Sementsov-Ogievskiy typedef void CleanupFunc(void *opaque);
247e1878eb5SVladimir Sementsov-Ogievskiy /**
248e1878eb5SVladimir Sementsov-Ogievskiy * Run entry in a coroutine and start timer. Wait for entry to finish or for
249e1878eb5SVladimir Sementsov-Ogievskiy * timer to elapse, what happen first. If entry finished, return 0, if timer
250e1878eb5SVladimir Sementsov-Ogievskiy * elapsed earlier, return -ETIMEDOUT.
251e1878eb5SVladimir Sementsov-Ogievskiy *
252e1878eb5SVladimir Sementsov-Ogievskiy * Be careful, entry execution is not canceled, user should handle it somehow.
253e1878eb5SVladimir Sementsov-Ogievskiy * If @clean is provided, it's called after coroutine finish if timeout
254e1878eb5SVladimir Sementsov-Ogievskiy * happened.
255e1878eb5SVladimir Sementsov-Ogievskiy */
256e1878eb5SVladimir Sementsov-Ogievskiy int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque,
257e1878eb5SVladimir Sementsov-Ogievskiy uint64_t timeout_ns, CleanupFunc clean);
258e1878eb5SVladimir Sementsov-Ogievskiy
2593d692649SVladimir Sementsov-Ogievskiy /**
2603d692649SVladimir Sementsov-Ogievskiy * Wake a coroutine if it is sleeping in qemu_co_sleep_ns. The timer will be
2613d692649SVladimir Sementsov-Ogievskiy * deleted. @sleep_state must be the variable whose address was given to
2623d692649SVladimir Sementsov-Ogievskiy * qemu_co_sleep_ns() and should be checked to be non-NULL before calling
2633d692649SVladimir Sementsov-Ogievskiy * qemu_co_sleep_wake().
2643d692649SVladimir Sementsov-Ogievskiy */
26529a6ea24SPaolo Bonzini void qemu_co_sleep_wake(QemuCoSleep *w);
26610817bf0SDaniel P. Berrange
26710817bf0SDaniel P. Berrange /**
26810817bf0SDaniel P. Berrange * Yield until a file descriptor becomes readable
26910817bf0SDaniel P. Berrange *
27010817bf0SDaniel P. Berrange * Note that this function clobbers the handlers for the file descriptor.
27110817bf0SDaniel P. Berrange */
27210817bf0SDaniel P. Berrange void coroutine_fn yield_until_fd_readable(int fd);
27310817bf0SDaniel P. Berrange
2744c41c69eSHiroki Narukawa /**
2754c41c69eSHiroki Narukawa * Increase coroutine pool size
2764c41c69eSHiroki Narukawa */
27798e3ab35SKevin Wolf void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size);
2784c41c69eSHiroki Narukawa
2794c41c69eSHiroki Narukawa /**
28098e3ab35SKevin Wolf * Decrease coroutine pool size
2814c41c69eSHiroki Narukawa */
28298e3ab35SKevin Wolf void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size);
2834c41c69eSHiroki Narukawa
2841a957cf9SPaolo Bonzini #include "qemu/lockable.h"
2851a957cf9SPaolo Bonzini
286c097f1e6SMarc-André Lureau /**
287c097f1e6SMarc-André Lureau * Sends a (part of) iovec down a socket, yielding when the socket is full, or
288c097f1e6SMarc-André Lureau * Receives data into a (part of) iovec from a socket,
289c097f1e6SMarc-André Lureau * yielding when there is no data in the socket.
290c097f1e6SMarc-André Lureau * The same interface as qemu_sendv_recvv(), with added yielding.
291c097f1e6SMarc-André Lureau * XXX should mark these as coroutine_fn
292c097f1e6SMarc-André Lureau */
293512ef174SAlberto Faria ssize_t coroutine_fn qemu_co_sendv_recvv(int sockfd, struct iovec *iov,
294512ef174SAlberto Faria unsigned iov_cnt, size_t offset,
295512ef174SAlberto Faria size_t bytes, bool do_send);
296c097f1e6SMarc-André Lureau #define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
297c097f1e6SMarc-André Lureau qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
298c097f1e6SMarc-André Lureau #define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
299c097f1e6SMarc-André Lureau qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true)
300c097f1e6SMarc-André Lureau
301c097f1e6SMarc-André Lureau /**
302c097f1e6SMarc-André Lureau * The same as above, but with just a single buffer
303c097f1e6SMarc-André Lureau */
304512ef174SAlberto Faria ssize_t coroutine_fn qemu_co_send_recv(int sockfd, void *buf, size_t bytes,
305512ef174SAlberto Faria bool do_send);
306c097f1e6SMarc-André Lureau #define qemu_co_recv(sockfd, buf, bytes) \
307c097f1e6SMarc-André Lureau qemu_co_send_recv(sockfd, buf, bytes, false)
308c097f1e6SMarc-André Lureau #define qemu_co_send(sockfd, buf, bytes) \
309c097f1e6SMarc-André Lureau qemu_co_send_recv(sockfd, buf, bytes, true)
310c097f1e6SMarc-André Lureau
31110817bf0SDaniel P. Berrange #endif /* QEMU_COROUTINE_H */
312