1 /* 2 * QEMU aio implementation 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #ifndef QEMU_AIO_H 15 #define QEMU_AIO_H 16 17 #ifdef CONFIG_LINUX_IO_URING 18 #include <liburing.h> 19 #endif 20 #include "qemu/coroutine-core.h" 21 #include "qemu/queue.h" 22 #include "qemu/event_notifier.h" 23 #include "qemu/lockcnt.h" 24 #include "qemu/thread.h" 25 #include "qemu/timer.h" 26 #include "block/graph-lock.h" 27 #include "hw/qdev-core.h" 28 29 30 typedef struct BlockAIOCB BlockAIOCB; 31 typedef void BlockCompletionFunc(void *opaque, int ret); 32 33 typedef struct AIOCBInfo { 34 void (*cancel_async)(BlockAIOCB *acb); 35 size_t aiocb_size; 36 } AIOCBInfo; 37 38 struct BlockAIOCB { 39 const AIOCBInfo *aiocb_info; 40 BlockDriverState *bs; 41 BlockCompletionFunc *cb; 42 void *opaque; 43 int refcnt; 44 }; 45 46 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 47 BlockCompletionFunc *cb, void *opaque); 48 void qemu_aio_unref(void *p); 49 void qemu_aio_ref(void *p); 50 51 typedef struct AioHandler AioHandler; 52 typedef QLIST_HEAD(, AioHandler) AioHandlerList; 53 typedef void QEMUBHFunc(void *opaque); 54 typedef bool AioPollFn(void *opaque); 55 typedef void IOHandler(void *opaque); 56 57 struct ThreadPoolAio; 58 struct LinuxAioState; 59 typedef struct LuringState LuringState; 60 61 /* Is polling disabled? */ 62 bool aio_poll_disabled(AioContext *ctx); 63 64 #ifdef CONFIG_LINUX_IO_URING 65 /* 66 * Each io_uring request must have a unique CqeHandler that processes the cqe. 67 * The lifetime of a CqeHandler must be at least from aio_add_sqe() until 68 * ->cb() invocation. 69 */ 70 typedef struct CqeHandler CqeHandler; 71 struct CqeHandler { 72 /* Called by the AioContext when the request has completed */ 73 void (*cb)(CqeHandler *handler); 74 75 /* Used internally, do not access this */ 76 QSIMPLEQ_ENTRY(CqeHandler) next; 77 78 /* This field is filled in before ->cb() is called */ 79 struct io_uring_cqe cqe; 80 }; 81 82 typedef QSIMPLEQ_HEAD(, CqeHandler) CqeHandlerSimpleQ; 83 #endif /* CONFIG_LINUX_IO_URING */ 84 85 /* Callbacks for file descriptor monitoring implementations */ 86 typedef struct { 87 /* 88 * update: 89 * @ctx: the AioContext 90 * @old_node: the existing handler or NULL if this file descriptor is being 91 * monitored for the first time 92 * @new_node: the new handler or NULL if this file descriptor is being 93 * removed 94 * 95 * Add/remove/modify a monitored file descriptor. 96 * 97 * Called with ctx->list_lock acquired. 98 */ 99 void (*update)(AioContext *ctx, AioHandler *old_node, AioHandler *new_node); 100 101 /* 102 * wait: 103 * @ctx: the AioContext 104 * @ready_list: list for handlers that become ready 105 * @timeout: maximum duration to wait, in nanoseconds 106 * 107 * Wait for file descriptors to become ready and place them on ready_list. 108 * 109 * Called with ctx->list_lock incremented but not locked. 110 * 111 * Returns: number of ready file descriptors. 112 */ 113 int (*wait)(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout); 114 115 /* 116 * need_wait: 117 * @ctx: the AioContext 118 * 119 * Tell aio_poll() when to stop userspace polling early because ->wait() 120 * has fds ready. 121 * 122 * File descriptor monitoring implementations that cannot poll fd readiness 123 * from userspace should use aio_poll_disabled() here. This ensures that 124 * file descriptors are not starved by handlers that frequently make 125 * progress via userspace polling. 126 * 127 * Returns: true if ->wait() should be called, false otherwise. 128 */ 129 bool (*need_wait)(AioContext *ctx); 130 131 /* 132 * dispatch: 133 * @ctx: the AioContext 134 * 135 * Dispatch any work that is specific to this file descriptor monitoring 136 * implementation. Usually the event loop's generic file descriptor 137 * monitoring, BH, and timer dispatching code is sufficient, but file 138 * descriptor monitoring implementations offering additional functionality 139 * may need to implement this function for custom behavior. Called at a 140 * point in the event loop when it is safe to invoke user-defined 141 * callbacks. 142 * 143 * This function is optional and may be NULL. 144 * 145 * Returns: true if progress was made (see aio_poll()'s return value), 146 * false otherwise. 147 */ 148 bool (*dispatch)(AioContext *ctx); 149 150 /* 151 * gsource_prepare: 152 * @ctx: the AioContext 153 * 154 * Prepare for the glib event loop to wait for events instead of the usual 155 * ->wait() call. See glib's GSourceFuncs->prepare(). 156 */ 157 void (*gsource_prepare)(AioContext *ctx); 158 159 /* 160 * gsource_check: 161 * @ctx: the AioContext 162 * 163 * Called by the glib event loop from glib's GSourceFuncs->check() after 164 * waiting for events. 165 * 166 * Returns: true when ready to be dispatched. 167 */ 168 bool (*gsource_check)(AioContext *ctx); 169 170 /* 171 * gsource_dispatch: 172 * @ctx: the AioContext 173 * @ready_list: list for handlers that become ready 174 * 175 * Place ready AioHandlers on ready_list. Called as part of the glib event 176 * loop from glib's GSourceFuncs->dispatch(). 177 * 178 * Called with list_lock incremented. 179 */ 180 void (*gsource_dispatch)(AioContext *ctx, AioHandlerList *ready_list); 181 182 #ifdef CONFIG_LINUX_IO_URING 183 /** 184 * add_sqe: Add an io_uring sqe for submission. 185 * @prep_sqe: invoked with an sqe that should be prepared for submission 186 * @opaque: user-defined argument to @prep_sqe() 187 * @cqe_handler: the unique cqe handler associated with this request 188 * 189 * The caller's @prep_sqe() function is invoked to fill in the details of 190 * the sqe. Do not call io_uring_sqe_set_data() on this sqe. 191 * 192 * The kernel may see the sqe as soon as @prep_sqe() returns or it may take 193 * until the next event loop iteration. 194 * 195 * This function is called from the current AioContext and is not 196 * thread-safe. 197 */ 198 void (*add_sqe)(AioContext *ctx, 199 void (*prep_sqe)(struct io_uring_sqe *sqe, void *opaque), 200 void *opaque, CqeHandler *cqe_handler); 201 #endif /* CONFIG_LINUX_IO_URING */ 202 } FDMonOps; 203 204 /* 205 * Each aio_bh_poll() call carves off a slice of the BH list, so that newly 206 * scheduled BHs are not processed until the next aio_bh_poll() call. All 207 * active aio_bh_poll() calls chain their slices together in a list, so that 208 * nested aio_bh_poll() calls process all scheduled bottom halves. 209 */ 210 typedef QSLIST_HEAD(, QEMUBH) BHList; 211 typedef struct BHListSlice BHListSlice; 212 struct BHListSlice { 213 BHList bh_list; 214 QSIMPLEQ_ENTRY(BHListSlice) next; 215 }; 216 217 typedef QSLIST_HEAD(, AioHandler) AioHandlerSList; 218 219 typedef struct AioPolledEvent { 220 int64_t ns; /* current polling time in nanoseconds */ 221 } AioPolledEvent; 222 223 struct AioContext { 224 GSource source; 225 226 /* Used by AioContext users to protect from multi-threaded access. */ 227 QemuRecMutex lock; 228 229 /* 230 * Keep track of readers and writers of the block layer graph. 231 * This is essential to avoid performing additions and removal 232 * of nodes and edges from block graph while some 233 * other thread is traversing it. 234 */ 235 BdrvGraphRWlock *bdrv_graph; 236 237 /* The list of registered AIO handlers. Protected by ctx->list_lock. */ 238 AioHandlerList aio_handlers; 239 240 /* The list of AIO handlers to be deleted. Protected by ctx->list_lock. */ 241 AioHandlerList deleted_aio_handlers; 242 243 /* Used to avoid unnecessary event_notifier_set calls in aio_notify; 244 * only written from the AioContext home thread, or under the BQL in 245 * the case of the main AioContext. However, it is read from any 246 * thread so it is still accessed with atomic primitives. 247 * 248 * If this field is 0, everything (file descriptors, bottom halves, 249 * timers) will be re-evaluated before the next blocking poll() or 250 * io_uring wait; therefore, the event_notifier_set call can be 251 * skipped. If it is non-zero, you may need to wake up a concurrent 252 * aio_poll or the glib main event loop, making event_notifier_set 253 * necessary. 254 * 255 * Bit 0 is reserved for GSource usage of the AioContext, and is 1 256 * between a call to aio_ctx_prepare and the next call to aio_ctx_check. 257 * Bits 1-31 simply count the number of active calls to aio_poll 258 * that are in the prepare or poll phase. 259 * 260 * The GSource and aio_poll must use a different mechanism because 261 * there is no certainty that a call to GSource's prepare callback 262 * (via g_main_context_prepare) is indeed followed by check and 263 * dispatch. It's not clear whether this would be a bug, but let's 264 * play safe and allow it---it will just cause extra calls to 265 * event_notifier_set until the next call to dispatch. 266 * 267 * Instead, the aio_poll calls include both the prepare and the 268 * dispatch phase, hence a simple counter is enough for them. 269 */ 270 uint32_t notify_me; 271 272 /* A lock to protect between QEMUBH and AioHandler adders and deleter, 273 * and to ensure that no callbacks are removed while we're walking and 274 * dispatching them. 275 */ 276 QemuLockCnt list_lock; 277 278 /* Bottom Halves pending aio_bh_poll() processing */ 279 BHList bh_list; 280 281 /* Chained BH list slices for each nested aio_bh_poll() call */ 282 QSIMPLEQ_HEAD(, BHListSlice) bh_slice_list; 283 284 /* Used by aio_notify. 285 * 286 * "notified" is used to avoid expensive event_notifier_test_and_clear 287 * calls. When it is clear, the EventNotifier is clear, or one thread 288 * is going to clear "notified" before processing more events. False 289 * positives are possible, i.e. "notified" could be set even though the 290 * EventNotifier is clear. 291 * 292 * Note that event_notifier_set *cannot* be optimized the same way. For 293 * more information on the problem that would result, see "#ifdef BUG2" 294 * in the docs/aio_notify_accept.promela formal model. 295 */ 296 bool notified; 297 EventNotifier notifier; 298 299 QSLIST_HEAD(, Coroutine) scheduled_coroutines; 300 QEMUBH *co_schedule_bh; 301 302 int thread_pool_min; 303 int thread_pool_max; 304 /* Thread pool for performing work and receiving completion callbacks. 305 * Has its own locking. 306 */ 307 struct ThreadPoolAio *thread_pool; 308 309 #ifdef CONFIG_LINUX_AIO 310 struct LinuxAioState *linux_aio; 311 #endif 312 #ifdef CONFIG_LINUX_IO_URING 313 /* State for file descriptor monitoring using Linux io_uring */ 314 struct io_uring fdmon_io_uring; 315 AioHandlerSList submit_list; 316 void *io_uring_fd_tag; 317 318 /* Pending callback state for cqe handlers */ 319 CqeHandlerSimpleQ cqe_handler_ready_list; 320 #endif /* CONFIG_LINUX_IO_URING */ 321 322 /* TimerLists for calling timers - one per clock type. Has its own 323 * locking. 324 */ 325 QEMUTimerListGroup tlg; 326 327 /* Number of AioHandlers without .io_poll() */ 328 int poll_disable_cnt; 329 330 /* Polling mode parameters */ 331 int64_t poll_max_ns; /* maximum polling time in nanoseconds */ 332 int64_t poll_grow; /* polling time growth factor */ 333 int64_t poll_shrink; /* polling time shrink factor */ 334 335 /* AIO engine parameters */ 336 int64_t aio_max_batch; /* maximum number of requests in a batch */ 337 338 /* 339 * List of handlers participating in userspace polling. Protected by 340 * ctx->list_lock. Iterated and modified mostly by the event loop thread 341 * from aio_poll() with ctx->list_lock incremented. aio_set_fd_handler() 342 * only touches the list to delete nodes if ctx->list_lock's count is zero. 343 */ 344 AioHandlerList poll_aio_handlers; 345 346 /* Are we in polling mode or monitoring file descriptors? */ 347 bool poll_started; 348 349 /* epoll(7) state used when built with CONFIG_EPOLL */ 350 int epollfd; 351 352 /* The GSource unix fd tag for epollfd */ 353 void *epollfd_tag; 354 355 const FDMonOps *fdmon_ops; 356 357 /* Was aio_context_new() successful? */ 358 bool initialized; 359 }; 360 361 /** 362 * aio_context_new: Allocate a new AioContext. 363 * 364 * AioContext provide a mini event-loop that can be waited on synchronously. 365 * They also provide bottom halves, a service to execute a piece of code 366 * as soon as possible. 367 */ 368 AioContext *aio_context_new(Error **errp); 369 370 /** 371 * aio_context_ref: 372 * @ctx: The AioContext to operate on. 373 * 374 * Add a reference to an AioContext. 375 */ 376 void aio_context_ref(AioContext *ctx); 377 378 /** 379 * aio_context_unref: 380 * @ctx: The AioContext to operate on. 381 * 382 * Drop a reference to an AioContext. 383 */ 384 void aio_context_unref(AioContext *ctx); 385 386 /** 387 * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will 388 * run only once and as soon as possible. 389 * 390 * @name: A human-readable identifier for debugging purposes. 391 */ 392 void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, 393 const char *name); 394 395 /** 396 * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run 397 * only once and as soon as possible. 398 * 399 * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the 400 * name string. 401 */ 402 #define aio_bh_schedule_oneshot(ctx, cb, opaque) \ 403 aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb))) 404 405 /** 406 * aio_bh_new_full: Allocate a new bottom half structure. 407 * 408 * Bottom halves are lightweight callbacks whose invocation is guaranteed 409 * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure 410 * is opaque and must be allocated prior to its use. 411 * 412 * @name: A human-readable identifier for debugging purposes. 413 * @reentrancy_guard: A guard set when entering a cb to prevent 414 * device-reentrancy issues 415 */ 416 QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, 417 const char *name, MemReentrancyGuard *reentrancy_guard); 418 419 /** 420 * aio_bh_new: Allocate a new bottom half structure 421 * 422 * A convenience wrapper for aio_bh_new_full() that uses the cb as the name 423 * string. 424 */ 425 #define aio_bh_new(ctx, cb, opaque) \ 426 aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) 427 428 /** 429 * aio_bh_new_guarded: Allocate a new bottom half structure with a 430 * reentrancy_guard 431 * 432 * A convenience wrapper for aio_bh_new_full() that uses the cb as the name 433 * string. 434 */ 435 #define aio_bh_new_guarded(ctx, cb, opaque, guard) \ 436 aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) 437 438 /** 439 * aio_notify: Force processing of pending events. 440 * 441 * Similar to signaling a condition variable, aio_notify forces 442 * aio_poll to exit, so that the next call will re-examine pending events. 443 * The caller of aio_notify will usually call aio_poll again very soon, 444 * or go through another iteration of the GLib main loop. Hence, aio_notify 445 * also has the side effect of recalculating the sets of file descriptors 446 * that the main loop waits for. 447 * 448 * Calling aio_notify is rarely necessary, because for example scheduling 449 * a bottom half calls it already. 450 */ 451 void aio_notify(AioContext *ctx); 452 453 /** 454 * aio_notify_accept: Acknowledge receiving an aio_notify. 455 * 456 * aio_notify() uses an EventNotifier in order to wake up a sleeping 457 * aio_poll() or g_main_context_iteration(). Calls to aio_notify() are 458 * usually rare, but the AioContext has to clear the EventNotifier on 459 * every aio_poll() or g_main_context_iteration() in order to avoid 460 * busy waiting. This event_notifier_test_and_clear() cannot be done 461 * using the usual aio_context_set_event_notifier(), because it must 462 * be done before processing all events (file descriptors, bottom halves, 463 * timers). 464 * 465 * aio_notify_accept() is an optimized event_notifier_test_and_clear() 466 * that is specific to an AioContext's notifier; it is used internally 467 * to clear the EventNotifier only if aio_notify() had been called. 468 */ 469 void aio_notify_accept(AioContext *ctx); 470 471 /** 472 * aio_bh_call: Executes callback function of the specified BH. 473 */ 474 void aio_bh_call(QEMUBH *bh); 475 476 /** 477 * aio_bh_poll: Poll bottom halves for an AioContext. 478 * 479 * These are internal functions used by the QEMU main loop. 480 * And notice that multiple occurrences of aio_bh_poll cannot 481 * be called concurrently 482 */ 483 int aio_bh_poll(AioContext *ctx); 484 485 /** 486 * qemu_bh_schedule: Schedule a bottom half. 487 * 488 * Scheduling a bottom half interrupts the main loop and causes the 489 * execution of the callback that was passed to qemu_bh_new. 490 * 491 * Bottom halves that are scheduled from a bottom half handler are instantly 492 * invoked. This can create an infinite loop if a bottom half handler 493 * schedules itself. 494 * 495 * @bh: The bottom half to be scheduled. 496 */ 497 void qemu_bh_schedule(QEMUBH *bh); 498 499 /** 500 * qemu_bh_cancel: Cancel execution of a bottom half. 501 * 502 * Canceling execution of a bottom half undoes the effect of calls to 503 * qemu_bh_schedule without freeing its resources yet. While cancellation 504 * itself is also wait-free and thread-safe, it can of course race with the 505 * loop that executes bottom halves unless you are holding the iothread 506 * mutex. This makes it mostly useless if you are not holding the mutex. 507 * 508 * @bh: The bottom half to be canceled. 509 */ 510 void qemu_bh_cancel(QEMUBH *bh); 511 512 /** 513 *qemu_bh_delete: Cancel execution of a bottom half and free its resources. 514 * 515 * Deleting a bottom half frees the memory that was allocated for it by 516 * qemu_bh_new. It also implies canceling the bottom half if it was 517 * scheduled. 518 * This func is async. The bottom half will do the delete action at the finial 519 * end. 520 * 521 * @bh: The bottom half to be deleted. 522 */ 523 void qemu_bh_delete(QEMUBH *bh); 524 525 /* Return whether there are any pending callbacks from the GSource 526 * attached to the AioContext, before g_poll is invoked. 527 * 528 * This is used internally in the implementation of the GSource. 529 */ 530 bool aio_prepare(AioContext *ctx); 531 532 /* Return whether there are any pending callbacks from the GSource 533 * attached to the AioContext, after g_poll is invoked. 534 * 535 * This is used internally in the implementation of the GSource. 536 */ 537 bool aio_pending(AioContext *ctx); 538 539 /* Dispatch any pending callbacks from the GSource attached to the AioContext. 540 * 541 * This is used internally in the implementation of the GSource. 542 */ 543 void aio_dispatch(AioContext *ctx); 544 545 /* Progress in completing AIO work to occur. This can issue new pending 546 * aio as a result of executing I/O completion or bh callbacks. 547 * 548 * Return whether any progress was made by executing AIO or bottom half 549 * handlers. If @blocking == true, this should always be true except 550 * if someone called aio_notify. 551 * 552 * If there are no pending bottom halves, but there are pending AIO 553 * operations, it may not be possible to make any progress without 554 * blocking. If @blocking is true, this function will wait until one 555 * or more AIO events have completed, to ensure something has moved 556 * before returning. 557 */ 558 bool no_coroutine_fn aio_poll(AioContext *ctx, bool blocking); 559 560 /* Register a file descriptor and associated callbacks. Behaves very similarly 561 * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will 562 * be invoked when using aio_poll(). 563 * 564 * Code that invokes AIO completion functions should rely on this function 565 * instead of qemu_set_fd_handler[2]. 566 */ 567 void aio_set_fd_handler(AioContext *ctx, 568 int fd, 569 IOHandler *io_read, 570 IOHandler *io_write, 571 AioPollFn *io_poll, 572 IOHandler *io_poll_ready, 573 void *opaque); 574 575 /* Register an event notifier and associated callbacks. Behaves very similarly 576 * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks 577 * will be invoked when using aio_poll(). 578 * 579 * Code that invokes AIO completion functions should rely on this function 580 * instead of event_notifier_set_handler. 581 */ 582 void aio_set_event_notifier(AioContext *ctx, 583 EventNotifier *notifier, 584 EventNotifierHandler *io_read, 585 AioPollFn *io_poll, 586 EventNotifierHandler *io_poll_ready); 587 588 /* 589 * Set polling begin/end callbacks for an event notifier that has already been 590 * registered with aio_set_event_notifier. Do nothing if the event notifier is 591 * not registered. 592 * 593 * Note that if the io_poll_end() callback (or the entire notifier) is removed 594 * during polling, it will not be called, so an io_poll_begin() is not 595 * necessarily always followed by an io_poll_end(). 596 */ 597 void aio_set_event_notifier_poll(AioContext *ctx, 598 EventNotifier *notifier, 599 EventNotifierHandler *io_poll_begin, 600 EventNotifierHandler *io_poll_end); 601 602 /* Return a GSource that lets the main loop poll the file descriptors attached 603 * to this AioContext. 604 */ 605 GSource *aio_get_g_source(AioContext *ctx); 606 607 /* Return the ThreadPoolAio bound to this AioContext */ 608 struct ThreadPoolAio *aio_get_thread_pool(AioContext *ctx); 609 610 /* Setup the LinuxAioState bound to this AioContext */ 611 struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); 612 613 /* Return the LinuxAioState bound to this AioContext */ 614 struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); 615 616 /** 617 * aio_timer_new_with_attrs: 618 * @ctx: the aio context 619 * @type: the clock type 620 * @scale: the scale 621 * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values 622 * to assign 623 * @cb: the callback to call on timer expiry 624 * @opaque: the opaque pointer to pass to the callback 625 * 626 * Allocate a new timer (with attributes) attached to the context @ctx. 627 * The function is responsible for memory allocation. 628 * 629 * The preferred interface is aio_timer_init or aio_timer_init_with_attrs. 630 * Use that unless you really need dynamic memory allocation. 631 * 632 * Returns: a pointer to the new timer 633 */ 634 static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx, 635 QEMUClockType type, 636 int scale, int attributes, 637 QEMUTimerCB *cb, void *opaque) 638 { 639 return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque); 640 } 641 642 /** 643 * aio_timer_new: 644 * @ctx: the aio context 645 * @type: the clock type 646 * @scale: the scale 647 * @cb: the callback to call on timer expiry 648 * @opaque: the opaque pointer to pass to the callback 649 * 650 * Allocate a new timer attached to the context @ctx. 651 * See aio_timer_new_with_attrs for details. 652 * 653 * Returns: a pointer to the new timer 654 */ 655 static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, 656 int scale, 657 QEMUTimerCB *cb, void *opaque) 658 { 659 return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque); 660 } 661 662 /** 663 * aio_timer_init_with_attrs: 664 * @ctx: the aio context 665 * @ts: the timer 666 * @type: the clock type 667 * @scale: the scale 668 * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values 669 * to assign 670 * @cb: the callback to call on timer expiry 671 * @opaque: the opaque pointer to pass to the callback 672 * 673 * Initialise a new timer (with attributes) attached to the context @ctx. 674 * The caller is responsible for memory allocation. 675 */ 676 static inline void aio_timer_init_with_attrs(AioContext *ctx, 677 QEMUTimer *ts, QEMUClockType type, 678 int scale, int attributes, 679 QEMUTimerCB *cb, void *opaque) 680 { 681 timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque); 682 } 683 684 /** 685 * aio_timer_init: 686 * @ctx: the aio context 687 * @ts: the timer 688 * @type: the clock type 689 * @scale: the scale 690 * @cb: the callback to call on timer expiry 691 * @opaque: the opaque pointer to pass to the callback 692 * 693 * Initialise a new timer attached to the context @ctx. 694 * See aio_timer_init_with_attrs for details. 695 */ 696 static inline void aio_timer_init(AioContext *ctx, 697 QEMUTimer *ts, QEMUClockType type, 698 int scale, 699 QEMUTimerCB *cb, void *opaque) 700 { 701 timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque); 702 } 703 704 /** 705 * aio_compute_timeout: 706 * @ctx: the aio context 707 * 708 * Compute the timeout that a blocking aio_poll should use. 709 */ 710 int64_t aio_compute_timeout(AioContext *ctx); 711 712 /** 713 * aio_co_schedule: 714 * @ctx: the aio context 715 * @co: the coroutine 716 * 717 * Start a coroutine on a remote AioContext. 718 * 719 * The coroutine must not be entered by anyone else while aio_co_schedule() 720 * is active. In addition the coroutine must have yielded unless ctx 721 * is the context in which the coroutine is running (i.e. the value of 722 * qemu_get_current_aio_context() from the coroutine itself). 723 */ 724 void aio_co_schedule(AioContext *ctx, Coroutine *co); 725 726 /** 727 * aio_co_reschedule_self: 728 * @new_ctx: the new context 729 * 730 * Move the currently running coroutine to new_ctx. If the coroutine is already 731 * running in new_ctx, do nothing. 732 * 733 * Note that this function cannot reschedule from iohandler_ctx to 734 * qemu_aio_context. 735 */ 736 void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx); 737 738 /** 739 * aio_co_wake: 740 * @co: the coroutine 741 * 742 * Restart a coroutine on the AioContext where it was running last, thus 743 * preventing coroutines from jumping from one context to another when they 744 * go to sleep. 745 * 746 * aio_co_wake may be executed either in coroutine or non-coroutine 747 * context. The coroutine must not be entered by anyone else while 748 * aio_co_wake() is active. 749 * 750 * If `co`'s AioContext differs from the current AioContext, this will call 751 * aio_co_schedule(), which makes this safe to use even when `co` has not 752 * yielded yet. In such a case, it will be entered once it yields. 753 * 754 * In contrast, if `co`'s AioContext is equal to the current one, it is 755 * required for `co` to currently be yielding. This is generally the case 756 * if the caller is not in `co` (i.e. invoked by `co`), because the only 757 * other way for the caller to be running then is for `co` to currently be 758 * yielding. 759 * 760 * Therefore, if there is no way for the caller to be invoked/entered by 761 * `co`, it is generally safe to call this regardless of whether `co` is 762 * known to already be yielding or not -- it only has to yield at some 763 * point. 764 */ 765 void aio_co_wake(Coroutine *co); 766 767 /** 768 * aio_co_enter: 769 * @ctx: the context to run the coroutine 770 * @co: the coroutine to run 771 * 772 * Enter a coroutine in the specified AioContext. 773 */ 774 void aio_co_enter(AioContext *ctx, Coroutine *co); 775 776 /** 777 * Return the AioContext whose event loop runs in the current thread. 778 * 779 * If called from an IOThread this will be the IOThread's AioContext. If 780 * called from the main thread or with the "big QEMU lock" taken it 781 * will be the main loop AioContext. 782 * 783 * Note that the return value is never the main loop's iohandler_ctx and the 784 * return value is the main loop AioContext instead. 785 */ 786 AioContext *qemu_get_current_aio_context(void); 787 788 void qemu_set_current_aio_context(AioContext *ctx); 789 790 /** 791 * aio_context_setup: 792 * @ctx: the aio context 793 * @errp: error pointer 794 * 795 * Initialize the aio context. 796 * 797 * Returns: true on success, false otherwise 798 */ 799 bool aio_context_setup(AioContext *ctx, Error **errp); 800 801 /** 802 * aio_context_destroy: 803 * @ctx: the aio context 804 * 805 * Destroy the aio context. 806 */ 807 void aio_context_destroy(AioContext *ctx); 808 809 /** 810 * aio_context_set_poll_params: 811 * @ctx: the aio context 812 * @max_ns: how long to busy poll for, in nanoseconds 813 * @grow: polling time growth factor 814 * @shrink: polling time shrink factor 815 * 816 * Poll mode can be disabled by setting poll_max_ns to 0. 817 */ 818 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, 819 int64_t grow, int64_t shrink, 820 Error **errp); 821 822 /** 823 * aio_context_set_aio_params: 824 * @ctx: the aio context 825 * @max_batch: maximum number of requests in a batch, 0 means that the 826 * engine will use its default 827 */ 828 void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch); 829 830 /** 831 * aio_context_set_thread_pool_params: 832 * @ctx: the aio context 833 * @min: min number of threads to have readily available in the thread pool 834 * @min: max number of threads the thread pool can contain 835 */ 836 void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, 837 int64_t max, Error **errp); 838 839 #ifdef CONFIG_LINUX_IO_URING 840 /** 841 * aio_has_io_uring: Return whether io_uring is available. 842 * 843 * io_uring is either available in all AioContexts or in none, so this only 844 * needs to be called once from within any thread's AioContext. 845 */ 846 static inline bool aio_has_io_uring(void) 847 { 848 AioContext *ctx = qemu_get_current_aio_context(); 849 return ctx->fdmon_ops->add_sqe; 850 } 851 852 /** 853 * aio_add_sqe: Add an io_uring sqe for submission. 854 * @prep_sqe: invoked with an sqe that should be prepared for submission 855 * @opaque: user-defined argument to @prep_sqe() 856 * @cqe_handler: the unique cqe handler associated with this request 857 * 858 * The caller's @prep_sqe() function is invoked to fill in the details of the 859 * sqe. Do not call io_uring_sqe_set_data() on this sqe. 860 * 861 * The sqe is submitted by the current AioContext. The kernel may see the sqe 862 * as soon as @prep_sqe() returns or it may take until the next event loop 863 * iteration. 864 * 865 * When the AioContext is destroyed, pending sqes are ignored and their 866 * CqeHandlers are not invoked. 867 * 868 * This function must be called only when aio_has_io_uring() returns true. 869 */ 870 void aio_add_sqe(void (*prep_sqe)(struct io_uring_sqe *sqe, void *opaque), 871 void *opaque, CqeHandler *cqe_handler); 872 #endif /* CONFIG_LINUX_IO_URING */ 873 874 #endif 875