xref: /openbmc/qemu/block/blkio.c (revision 701bff24)
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 /*
3  * libblkio BlockDriver
4  *
5  * Copyright Red Hat, Inc.
6  *
7  * Author:
8  *   Stefan Hajnoczi <stefanha@redhat.com>
9  */
10 
11 #include "qemu/osdep.h"
12 #include <blkio.h>
13 #include "block/block_int.h"
14 #include "qapi/error.h"
15 #include "qapi/qmp/qdict.h"
16 #include "qemu/module.h"
17 
18 /*
19  * Keep the QEMU BlockDriver names identical to the libblkio driver names.
20  * Using macros instead of typing out the string literals avoids typos.
21  */
22 #define DRIVER_IO_URING "io_uring"
23 #define DRIVER_NVME_IO_URING "nvme-io_uring"
24 #define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
25 #define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
26 
27 /*
28  * Allocated bounce buffers are kept in a list sorted by buffer address.
29  */
30 typedef struct BlkioBounceBuf {
31     QLIST_ENTRY(BlkioBounceBuf) next;
32 
33     /* The bounce buffer */
34     struct iovec buf;
35 } BlkioBounceBuf;
36 
37 typedef struct {
38     /*
39      * libblkio is not thread-safe so this lock protects ->blkio and
40      * ->blkioq.
41      */
42     QemuMutex blkio_lock;
43     struct blkio *blkio;
44     struct blkioq *blkioq; /* make this multi-queue in the future... */
45     int completion_fd;
46 
47     /*
48      * Polling fetches the next completion into this field.
49      *
50      * No lock is necessary since only one thread calls aio_poll() and invokes
51      * fd and poll handlers.
52      */
53     struct blkio_completion poll_completion;
54 
55     /*
56      * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
57      *
58      * Lock ordering: ->bounce_lock before ->blkio_lock.
59      */
60     CoMutex bounce_lock;
61 
62     /* Bounce buffer pool */
63     struct blkio_mem_region bounce_pool;
64 
65     /* Sorted list of allocated bounce buffers */
66     QLIST_HEAD(, BlkioBounceBuf) bounce_bufs;
67 
68     /* Queue for coroutines waiting for bounce buffer space */
69     CoQueue bounce_available;
70 
71     /* The value of the "mem-region-alignment" property */
72     size_t mem_region_alignment;
73 
74     /* Can we skip adding/deleting blkio_mem_regions? */
75     bool needs_mem_regions;
76 } BDRVBlkioState;
77 
78 /* Called with s->bounce_lock held */
79 static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
80 {
81     /* There can be no allocated bounce buffers during resize */
82     assert(QLIST_EMPTY(&s->bounce_bufs));
83 
84     /* Pad size to reduce frequency of resize calls */
85     bytes += 128 * 1024;
86 
87     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
88         int ret;
89 
90         if (s->bounce_pool.addr) {
91             blkio_unmap_mem_region(s->blkio, &s->bounce_pool);
92             blkio_free_mem_region(s->blkio, &s->bounce_pool);
93             memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
94         }
95 
96         /* Automatically freed when s->blkio is destroyed */
97         ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes);
98         if (ret < 0) {
99             return ret;
100         }
101 
102         ret = blkio_map_mem_region(s->blkio, &s->bounce_pool);
103         if (ret < 0) {
104             blkio_free_mem_region(s->blkio, &s->bounce_pool);
105             memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
106             return ret;
107         }
108     }
109 
110     return 0;
111 }
112 
113 /* Called with s->bounce_lock held */
114 static bool
115 blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
116                              int64_t bytes)
117 {
118     void *addr = s->bounce_pool.addr;
119     BlkioBounceBuf *cur = NULL;
120     BlkioBounceBuf *prev = NULL;
121     ptrdiff_t space;
122 
123     /*
124      * This is just a linear search over the holes between requests. An
125      * efficient allocator would be nice.
126      */
127     QLIST_FOREACH(cur, &s->bounce_bufs, next) {
128         space = cur->buf.iov_base - addr;
129         if (bytes <= space) {
130             QLIST_INSERT_BEFORE(cur, bounce, next);
131             bounce->buf.iov_base = addr;
132             bounce->buf.iov_len = bytes;
133             return true;
134         }
135 
136         addr = cur->buf.iov_base + cur->buf.iov_len;
137         prev = cur;
138     }
139 
140     /* Is there space after the last request? */
141     space = s->bounce_pool.addr + s->bounce_pool.len - addr;
142     if (bytes > space) {
143         return false;
144     }
145     if (prev) {
146         QLIST_INSERT_AFTER(prev, bounce, next);
147     } else {
148         QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next);
149     }
150     bounce->buf.iov_base = addr;
151     bounce->buf.iov_len = bytes;
152     return true;
153 }
154 
155 static int coroutine_fn
156 blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
157                           int64_t bytes)
158 {
159     /*
160      * Ensure fairness: first time around we join the back of the queue,
161      * subsequently we join the front so we don't lose our place.
162      */
163     CoQueueWaitFlags wait_flags = 0;
164 
165     QEMU_LOCK_GUARD(&s->bounce_lock);
166 
167     /* Ensure fairness: don't even try if other requests are already waiting */
168     if (!qemu_co_queue_empty(&s->bounce_available)) {
169         qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
170                                  wait_flags);
171         wait_flags = CO_QUEUE_WAIT_FRONT;
172     }
173 
174     while (true) {
175         if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) {
176             /* Kick the next queued request since there may be space */
177             qemu_co_queue_next(&s->bounce_available);
178             return 0;
179         }
180 
181         /*
182          * If there are no in-flight requests then the pool was simply too
183          * small.
184          */
185         if (QLIST_EMPTY(&s->bounce_bufs)) {
186             bool ok;
187             int ret;
188 
189             ret = blkio_resize_bounce_pool(s, bytes);
190             if (ret < 0) {
191                 /* Kick the next queued request since that may fail too */
192                 qemu_co_queue_next(&s->bounce_available);
193                 return ret;
194             }
195 
196             ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes);
197             assert(ok); /* must have space this time */
198             return 0;
199         }
200 
201         qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
202                                  wait_flags);
203         wait_flags = CO_QUEUE_WAIT_FRONT;
204     }
205 }
206 
207 static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s,
208                                                   BlkioBounceBuf *bounce)
209 {
210     QEMU_LOCK_GUARD(&s->bounce_lock);
211 
212     QLIST_REMOVE(bounce, next);
213 
214     /* Wake up waiting coroutines since space may now be available */
215     qemu_co_queue_next(&s->bounce_available);
216 }
217 
218 /* For async to .bdrv_co_*() conversion */
219 typedef struct {
220     Coroutine *coroutine;
221     int ret;
222 } BlkioCoData;
223 
224 static void blkio_completion_fd_read(void *opaque)
225 {
226     BlockDriverState *bs = opaque;
227     BDRVBlkioState *s = bs->opaque;
228     uint64_t val;
229     int ret;
230 
231     /* Polling may have already fetched a completion */
232     if (s->poll_completion.user_data != NULL) {
233         BlkioCoData *cod = s->poll_completion.user_data;
234         cod->ret = s->poll_completion.ret;
235 
236         /* Clear it in case aio_co_wake() enters a nested event loop */
237         s->poll_completion.user_data = NULL;
238 
239         aio_co_wake(cod->coroutine);
240     }
241 
242     /* Reset completion fd status */
243     ret = read(s->completion_fd, &val, sizeof(val));
244 
245     /* Ignore errors, there's nothing we can do */
246     (void)ret;
247 
248     /*
249      * Reading one completion at a time makes nested event loop re-entrancy
250      * simple. Change this loop to get multiple completions in one go if it
251      * becomes a performance bottleneck.
252      */
253     while (true) {
254         struct blkio_completion completion;
255 
256         WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
257             ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL);
258         }
259         if (ret != 1) {
260             break;
261         }
262 
263         BlkioCoData *cod = completion.user_data;
264         cod->ret = completion.ret;
265         aio_co_wake(cod->coroutine);
266     }
267 }
268 
269 static bool blkio_completion_fd_poll(void *opaque)
270 {
271     BlockDriverState *bs = opaque;
272     BDRVBlkioState *s = bs->opaque;
273     int ret;
274 
275     /* Just in case we already fetched a completion */
276     if (s->poll_completion.user_data != NULL) {
277         return true;
278     }
279 
280     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
281         ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL);
282     }
283     return ret == 1;
284 }
285 
286 static void blkio_completion_fd_poll_ready(void *opaque)
287 {
288     blkio_completion_fd_read(opaque);
289 }
290 
291 static void blkio_attach_aio_context(BlockDriverState *bs,
292                                      AioContext *new_context)
293 {
294     BDRVBlkioState *s = bs->opaque;
295 
296     aio_set_fd_handler(new_context,
297                        s->completion_fd,
298                        false,
299                        blkio_completion_fd_read,
300                        NULL,
301                        blkio_completion_fd_poll,
302                        blkio_completion_fd_poll_ready,
303                        bs);
304 }
305 
306 static void blkio_detach_aio_context(BlockDriverState *bs)
307 {
308     BDRVBlkioState *s = bs->opaque;
309 
310     aio_set_fd_handler(bdrv_get_aio_context(bs),
311                        s->completion_fd,
312                        false, NULL, NULL, NULL, NULL, NULL);
313 }
314 
315 /* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
316 static void blkio_submit_io(BlockDriverState *bs)
317 {
318     if (qatomic_read(&bs->io_plugged) == 0) {
319         BDRVBlkioState *s = bs->opaque;
320 
321         blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
322     }
323 }
324 
325 static int coroutine_fn
326 blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
327 {
328     BDRVBlkioState *s = bs->opaque;
329     BlkioCoData cod = {
330         .coroutine = qemu_coroutine_self(),
331     };
332 
333     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
334         blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
335         blkio_submit_io(bs);
336     }
337 
338     qemu_coroutine_yield();
339     return cod.ret;
340 }
341 
342 static int coroutine_fn
343 blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
344                 QEMUIOVector *qiov, BdrvRequestFlags flags)
345 {
346     BlkioCoData cod = {
347         .coroutine = qemu_coroutine_self(),
348     };
349     BDRVBlkioState *s = bs->opaque;
350     bool use_bounce_buffer = s->needs_mem_regions;
351     BlkioBounceBuf bounce;
352     struct iovec *iov = qiov->iov;
353     int iovcnt = qiov->niov;
354 
355     if (use_bounce_buffer) {
356         int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
357         if (ret < 0) {
358             return ret;
359         }
360 
361         iov = &bounce.buf;
362         iovcnt = 1;
363     }
364 
365     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
366         blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
367         blkio_submit_io(bs);
368     }
369 
370     qemu_coroutine_yield();
371 
372     if (use_bounce_buffer) {
373         if (cod.ret == 0) {
374             qemu_iovec_from_buf(qiov, 0,
375                                 bounce.buf.iov_base,
376                                 bounce.buf.iov_len);
377         }
378 
379         blkio_free_bounce_buffer(s, &bounce);
380     }
381 
382     return cod.ret;
383 }
384 
385 static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
386         int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
387 {
388     uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
389     BlkioCoData cod = {
390         .coroutine = qemu_coroutine_self(),
391     };
392     BDRVBlkioState *s = bs->opaque;
393     bool use_bounce_buffer = s->needs_mem_regions;
394     BlkioBounceBuf bounce;
395     struct iovec *iov = qiov->iov;
396     int iovcnt = qiov->niov;
397 
398     if (use_bounce_buffer) {
399         int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
400         if (ret < 0) {
401             return ret;
402         }
403 
404         qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes);
405         iov = &bounce.buf;
406         iovcnt = 1;
407     }
408 
409     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
410         blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
411         blkio_submit_io(bs);
412     }
413 
414     qemu_coroutine_yield();
415 
416     if (use_bounce_buffer) {
417         blkio_free_bounce_buffer(s, &bounce);
418     }
419 
420     return cod.ret;
421 }
422 
423 static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
424 {
425     BDRVBlkioState *s = bs->opaque;
426     BlkioCoData cod = {
427         .coroutine = qemu_coroutine_self(),
428     };
429 
430     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
431         blkioq_flush(s->blkioq, &cod, 0);
432         blkio_submit_io(bs);
433     }
434 
435     qemu_coroutine_yield();
436     return cod.ret;
437 }
438 
439 static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
440     int64_t offset, int64_t bytes, BdrvRequestFlags flags)
441 {
442     BDRVBlkioState *s = bs->opaque;
443     BlkioCoData cod = {
444         .coroutine = qemu_coroutine_self(),
445     };
446     uint32_t blkio_flags = 0;
447 
448     if (flags & BDRV_REQ_FUA) {
449         blkio_flags |= BLKIO_REQ_FUA;
450     }
451     if (!(flags & BDRV_REQ_MAY_UNMAP)) {
452         blkio_flags |= BLKIO_REQ_NO_UNMAP;
453     }
454     if (flags & BDRV_REQ_NO_FALLBACK) {
455         blkio_flags |= BLKIO_REQ_NO_FALLBACK;
456     }
457 
458     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
459         blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
460         blkio_submit_io(bs);
461     }
462 
463     qemu_coroutine_yield();
464     return cod.ret;
465 }
466 
467 static void blkio_io_unplug(BlockDriverState *bs)
468 {
469     BDRVBlkioState *s = bs->opaque;
470 
471     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
472         blkio_submit_io(bs);
473     }
474 }
475 
476 static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
477                                Error **errp)
478 {
479     const char *filename = qdict_get_str(options, "filename");
480     BDRVBlkioState *s = bs->opaque;
481     int ret;
482 
483     ret = blkio_set_str(s->blkio, "path", filename);
484     qdict_del(options, "filename");
485     if (ret < 0) {
486         error_setg_errno(errp, -ret, "failed to set path: %s",
487                          blkio_get_error_msg());
488         return ret;
489     }
490 
491     if (flags & BDRV_O_NOCACHE) {
492         ret = blkio_set_bool(s->blkio, "direct", true);
493         if (ret < 0) {
494             error_setg_errno(errp, -ret, "failed to set direct: %s",
495                              blkio_get_error_msg());
496             return ret;
497         }
498     }
499 
500     return 0;
501 }
502 
503 static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
504                                Error **errp)
505 {
506     const char *filename = qdict_get_str(options, "filename");
507     BDRVBlkioState *s = bs->opaque;
508     int ret;
509 
510     ret = blkio_set_str(s->blkio, "path", filename);
511     qdict_del(options, "filename");
512     if (ret < 0) {
513         error_setg_errno(errp, -ret, "failed to set path: %s",
514                          blkio_get_error_msg());
515         return ret;
516     }
517 
518     if (!(flags & BDRV_O_NOCACHE)) {
519         error_setg(errp, "cache.direct=off is not supported");
520         return -EINVAL;
521     }
522 
523     return 0;
524 }
525 
526 static int blkio_virtio_blk_common_open(BlockDriverState *bs,
527         QDict *options, int flags, Error **errp)
528 {
529     const char *path = qdict_get_try_str(options, "path");
530     BDRVBlkioState *s = bs->opaque;
531     int ret;
532 
533     if (!path) {
534         error_setg(errp, "missing 'path' option");
535         return -EINVAL;
536     }
537 
538     ret = blkio_set_str(s->blkio, "path", path);
539     qdict_del(options, "path");
540     if (ret < 0) {
541         error_setg_errno(errp, -ret, "failed to set path: %s",
542                          blkio_get_error_msg());
543         return ret;
544     }
545 
546     if (!(flags & BDRV_O_NOCACHE)) {
547         error_setg(errp, "cache.direct=off is not supported");
548         return -EINVAL;
549     }
550     return 0;
551 }
552 
553 static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
554                            Error **errp)
555 {
556     const char *blkio_driver = bs->drv->protocol_name;
557     BDRVBlkioState *s = bs->opaque;
558     int ret;
559 
560     ret = blkio_create(blkio_driver, &s->blkio);
561     if (ret < 0) {
562         error_setg_errno(errp, -ret, "blkio_create failed: %s",
563                          blkio_get_error_msg());
564         return ret;
565     }
566 
567     if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
568         ret = blkio_io_uring_open(bs, options, flags, errp);
569     } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
570         ret = blkio_nvme_io_uring(bs, options, flags, errp);
571     } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
572         ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
573     } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
574         ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
575     } else {
576         g_assert_not_reached();
577     }
578     if (ret < 0) {
579         blkio_destroy(&s->blkio);
580         return ret;
581     }
582 
583     if (!(flags & BDRV_O_RDWR)) {
584         ret = blkio_set_bool(s->blkio, "read-only", true);
585         if (ret < 0) {
586             error_setg_errno(errp, -ret, "failed to set read-only: %s",
587                              blkio_get_error_msg());
588             blkio_destroy(&s->blkio);
589             return ret;
590         }
591     }
592 
593     ret = blkio_connect(s->blkio);
594     if (ret < 0) {
595         error_setg_errno(errp, -ret, "blkio_connect failed: %s",
596                          blkio_get_error_msg());
597         blkio_destroy(&s->blkio);
598         return ret;
599     }
600 
601     ret = blkio_get_bool(s->blkio,
602                          "needs-mem-regions",
603                          &s->needs_mem_regions);
604     if (ret < 0) {
605         error_setg_errno(errp, -ret,
606                          "failed to get needs-mem-regions: %s",
607                          blkio_get_error_msg());
608         blkio_destroy(&s->blkio);
609         return ret;
610     }
611 
612     ret = blkio_get_uint64(s->blkio,
613                            "mem-region-alignment",
614                            &s->mem_region_alignment);
615     if (ret < 0) {
616         error_setg_errno(errp, -ret,
617                          "failed to get mem-region-alignment: %s",
618                          blkio_get_error_msg());
619         blkio_destroy(&s->blkio);
620         return ret;
621     }
622 
623     ret = blkio_start(s->blkio);
624     if (ret < 0) {
625         error_setg_errno(errp, -ret, "blkio_start failed: %s",
626                          blkio_get_error_msg());
627         blkio_destroy(&s->blkio);
628         return ret;
629     }
630 
631     bs->supported_write_flags = BDRV_REQ_FUA;
632     bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
633                                BDRV_REQ_NO_FALLBACK;
634 
635     qemu_mutex_init(&s->blkio_lock);
636     qemu_co_mutex_init(&s->bounce_lock);
637     qemu_co_queue_init(&s->bounce_available);
638     QLIST_INIT(&s->bounce_bufs);
639     s->blkioq = blkio_get_queue(s->blkio, 0);
640     s->completion_fd = blkioq_get_completion_fd(s->blkioq);
641 
642     blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
643     return 0;
644 }
645 
646 static void blkio_close(BlockDriverState *bs)
647 {
648     BDRVBlkioState *s = bs->opaque;
649 
650     /* There is no destroy() API for s->bounce_lock */
651 
652     qemu_mutex_destroy(&s->blkio_lock);
653     blkio_detach_aio_context(bs);
654     blkio_destroy(&s->blkio);
655 }
656 
657 static int64_t blkio_getlength(BlockDriverState *bs)
658 {
659     BDRVBlkioState *s = bs->opaque;
660     uint64_t capacity;
661     int ret;
662 
663     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
664         ret = blkio_get_uint64(s->blkio, "capacity", &capacity);
665     }
666     if (ret < 0) {
667         return -ret;
668     }
669 
670     return capacity;
671 }
672 
673 static int blkio_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
674 {
675     return 0;
676 }
677 
678 static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
679 {
680     BDRVBlkioState *s = bs->opaque;
681     QEMU_LOCK_GUARD(&s->blkio_lock);
682     int value;
683     int ret;
684 
685     ret = blkio_get_int(s->blkio, "request-alignment", &value);
686     if (ret < 0) {
687         error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s",
688                          blkio_get_error_msg());
689         return;
690     }
691     bs->bl.request_alignment = value;
692     if (bs->bl.request_alignment < 1 ||
693         bs->bl.request_alignment >= INT_MAX ||
694         !is_power_of_2(bs->bl.request_alignment)) {
695         error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", "
696                    "must be a power of 2 less than INT_MAX",
697                    bs->bl.request_alignment);
698         return;
699     }
700 
701     ret = blkio_get_int(s->blkio, "optimal-io-size", &value);
702     if (ret < 0) {
703         error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s",
704                          blkio_get_error_msg());
705         return;
706     }
707     bs->bl.opt_transfer = value;
708     if (bs->bl.opt_transfer > INT_MAX ||
709         (bs->bl.opt_transfer % bs->bl.request_alignment)) {
710         error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must "
711                    "be a multiple of %" PRIu32, bs->bl.opt_transfer,
712                    bs->bl.request_alignment);
713         return;
714     }
715 
716     ret = blkio_get_int(s->blkio, "max-transfer", &value);
717     if (ret < 0) {
718         error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s",
719                          blkio_get_error_msg());
720         return;
721     }
722     bs->bl.max_transfer = value;
723     if ((bs->bl.max_transfer % bs->bl.request_alignment) ||
724         (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) {
725         error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be "
726                    "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)",
727                    bs->bl.max_transfer, bs->bl.request_alignment,
728                    bs->bl.opt_transfer);
729         return;
730     }
731 
732     ret = blkio_get_int(s->blkio, "buf-alignment", &value);
733     if (ret < 0) {
734         error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s",
735                          blkio_get_error_msg());
736         return;
737     }
738     if (value < 1) {
739         error_setg(errp, "invalid \"buf-alignment\" value %d, must be "
740                    "positive", value);
741         return;
742     }
743     bs->bl.min_mem_alignment = value;
744 
745     ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value);
746     if (ret < 0) {
747         error_setg_errno(errp, -ret,
748                          "failed to get \"optimal-buf-alignment\": %s",
749                          blkio_get_error_msg());
750         return;
751     }
752     if (value < 1) {
753         error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, "
754                    "must be positive", value);
755         return;
756     }
757     bs->bl.opt_mem_alignment = value;
758 
759     ret = blkio_get_int(s->blkio, "max-segments", &value);
760     if (ret < 0) {
761         error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s",
762                          blkio_get_error_msg());
763         return;
764     }
765     if (value < 1) {
766         error_setg(errp, "invalid \"max-segments\" value %d, must be positive",
767                    value);
768         return;
769     }
770     bs->bl.max_iov = value;
771 }
772 
773 /*
774  * TODO
775  * Missing libblkio APIs:
776  * - block_status
777  * - co_invalidate_cache
778  *
779  * Out of scope?
780  * - create
781  * - truncate
782  */
783 
784 #define BLKIO_DRIVER(name, ...) \
785     { \
786         .format_name             = name, \
787         .protocol_name           = name, \
788         .instance_size           = sizeof(BDRVBlkioState), \
789         .bdrv_file_open          = blkio_file_open, \
790         .bdrv_close              = blkio_close, \
791         .bdrv_getlength          = blkio_getlength, \
792         .bdrv_get_info           = blkio_get_info, \
793         .bdrv_attach_aio_context = blkio_attach_aio_context, \
794         .bdrv_detach_aio_context = blkio_detach_aio_context, \
795         .bdrv_co_pdiscard        = blkio_co_pdiscard, \
796         .bdrv_co_preadv          = blkio_co_preadv, \
797         .bdrv_co_pwritev         = blkio_co_pwritev, \
798         .bdrv_co_flush_to_disk   = blkio_co_flush, \
799         .bdrv_co_pwrite_zeroes   = blkio_co_pwrite_zeroes, \
800         .bdrv_io_unplug          = blkio_io_unplug, \
801         .bdrv_refresh_limits     = blkio_refresh_limits, \
802         __VA_ARGS__ \
803     }
804 
805 static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
806     DRIVER_IO_URING,
807     .bdrv_needs_filename = true,
808 );
809 
810 static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
811     DRIVER_NVME_IO_URING,
812     .bdrv_needs_filename = true,
813 );
814 
815 static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
816     DRIVER_VIRTIO_BLK_VHOST_USER
817 );
818 
819 static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
820     DRIVER_VIRTIO_BLK_VHOST_VDPA
821 );
822 
823 static void bdrv_blkio_init(void)
824 {
825     bdrv_register(&bdrv_io_uring);
826     bdrv_register(&bdrv_nvme_io_uring);
827     bdrv_register(&bdrv_virtio_blk_vhost_user);
828     bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
829 }
830 
831 block_init(bdrv_blkio_init);
832