1fd66dbd4SStefan Hajnoczi /* SPDX-License-Identifier: LGPL-2.1-or-later */
2fd66dbd4SStefan Hajnoczi /*
3fd66dbd4SStefan Hajnoczi * libblkio BlockDriver
4fd66dbd4SStefan Hajnoczi *
5fd66dbd4SStefan Hajnoczi * Copyright Red Hat, Inc.
6fd66dbd4SStefan Hajnoczi *
7fd66dbd4SStefan Hajnoczi * Author:
8fd66dbd4SStefan Hajnoczi * Stefan Hajnoczi <stefanha@redhat.com>
9fd66dbd4SStefan Hajnoczi */
10fd66dbd4SStefan Hajnoczi
11fd66dbd4SStefan Hajnoczi #include "qemu/osdep.h"
12fd66dbd4SStefan Hajnoczi #include <blkio.h>
13fd66dbd4SStefan Hajnoczi #include "block/block_int.h"
14c5640b3eSStefan Hajnoczi #include "exec/memory.h"
15c5640b3eSStefan Hajnoczi #include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
16433fcea4SStefan Hajnoczi #include "qemu/defer-call.h"
17fd66dbd4SStefan Hajnoczi #include "qapi/error.h"
18c5640b3eSStefan Hajnoczi #include "qemu/error-report.h"
19fd66dbd4SStefan Hajnoczi #include "qapi/qmp/qdict.h"
20fd66dbd4SStefan Hajnoczi #include "qemu/module.h"
2128ff7b4dSStefan Hajnoczi #include "sysemu/block-backend.h"
22c5640b3eSStefan Hajnoczi #include "exec/memory.h" /* for ram_block_discard_disable() */
23fd66dbd4SStefan Hajnoczi
244f01a9bbSPeter Krempa #include "block/block-io.h"
254f01a9bbSPeter Krempa
26fd66dbd4SStefan Hajnoczi /*
27fd66dbd4SStefan Hajnoczi * Allocated bounce buffers are kept in a list sorted by buffer address.
28fd66dbd4SStefan Hajnoczi */
29fd66dbd4SStefan Hajnoczi typedef struct BlkioBounceBuf {
30fd66dbd4SStefan Hajnoczi QLIST_ENTRY(BlkioBounceBuf) next;
31fd66dbd4SStefan Hajnoczi
32fd66dbd4SStefan Hajnoczi /* The bounce buffer */
33fd66dbd4SStefan Hajnoczi struct iovec buf;
34fd66dbd4SStefan Hajnoczi } BlkioBounceBuf;
35fd66dbd4SStefan Hajnoczi
36fd66dbd4SStefan Hajnoczi typedef struct {
37fd66dbd4SStefan Hajnoczi /*
38fd66dbd4SStefan Hajnoczi * libblkio is not thread-safe so this lock protects ->blkio and
39fd66dbd4SStefan Hajnoczi * ->blkioq.
40fd66dbd4SStefan Hajnoczi */
41fd66dbd4SStefan Hajnoczi QemuMutex blkio_lock;
42fd66dbd4SStefan Hajnoczi struct blkio *blkio;
43fd66dbd4SStefan Hajnoczi struct blkioq *blkioq; /* make this multi-queue in the future... */
44fd66dbd4SStefan Hajnoczi int completion_fd;
45fd66dbd4SStefan Hajnoczi
46fd66dbd4SStefan Hajnoczi /*
47fd66dbd4SStefan Hajnoczi * Polling fetches the next completion into this field.
48fd66dbd4SStefan Hajnoczi *
49fd66dbd4SStefan Hajnoczi * No lock is necessary since only one thread calls aio_poll() and invokes
50fd66dbd4SStefan Hajnoczi * fd and poll handlers.
51fd66dbd4SStefan Hajnoczi */
52fd66dbd4SStefan Hajnoczi struct blkio_completion poll_completion;
53fd66dbd4SStefan Hajnoczi
54fd66dbd4SStefan Hajnoczi /*
55fd66dbd4SStefan Hajnoczi * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
56fd66dbd4SStefan Hajnoczi *
57fd66dbd4SStefan Hajnoczi * Lock ordering: ->bounce_lock before ->blkio_lock.
58fd66dbd4SStefan Hajnoczi */
59fd66dbd4SStefan Hajnoczi CoMutex bounce_lock;
60fd66dbd4SStefan Hajnoczi
61fd66dbd4SStefan Hajnoczi /* Bounce buffer pool */
62fd66dbd4SStefan Hajnoczi struct blkio_mem_region bounce_pool;
63fd66dbd4SStefan Hajnoczi
64fd66dbd4SStefan Hajnoczi /* Sorted list of allocated bounce buffers */
65fd66dbd4SStefan Hajnoczi QLIST_HEAD(, BlkioBounceBuf) bounce_bufs;
66fd66dbd4SStefan Hajnoczi
67fd66dbd4SStefan Hajnoczi /* Queue for coroutines waiting for bounce buffer space */
68fd66dbd4SStefan Hajnoczi CoQueue bounce_available;
69fd66dbd4SStefan Hajnoczi
70fd66dbd4SStefan Hajnoczi /* The value of the "mem-region-alignment" property */
71615eaeabSRichard W.M. Jones uint64_t mem_region_alignment;
72fd66dbd4SStefan Hajnoczi
73fd66dbd4SStefan Hajnoczi /* Can we skip adding/deleting blkio_mem_regions? */
74fd66dbd4SStefan Hajnoczi bool needs_mem_regions;
75c5640b3eSStefan Hajnoczi
76c5640b3eSStefan Hajnoczi /* Are file descriptors necessary for blkio_mem_regions? */
77c5640b3eSStefan Hajnoczi bool needs_mem_region_fd;
78c5640b3eSStefan Hajnoczi
79c5640b3eSStefan Hajnoczi /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
80c5640b3eSStefan Hajnoczi bool may_pin_mem_regions;
81fd66dbd4SStefan Hajnoczi } BDRVBlkioState;
82fd66dbd4SStefan Hajnoczi
83fd66dbd4SStefan Hajnoczi /* Called with s->bounce_lock held */
blkio_resize_bounce_pool(BDRVBlkioState * s,int64_t bytes)84fd66dbd4SStefan Hajnoczi static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
85fd66dbd4SStefan Hajnoczi {
86fd66dbd4SStefan Hajnoczi /* There can be no allocated bounce buffers during resize */
87fd66dbd4SStefan Hajnoczi assert(QLIST_EMPTY(&s->bounce_bufs));
88fd66dbd4SStefan Hajnoczi
89fd66dbd4SStefan Hajnoczi /* Pad size to reduce frequency of resize calls */
90fd66dbd4SStefan Hajnoczi bytes += 128 * 1024;
91fd66dbd4SStefan Hajnoczi
9210b2393eSKevin Wolf /* Align the pool size to avoid blkio_alloc_mem_region() failure */
9310b2393eSKevin Wolf bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment);
9410b2393eSKevin Wolf
95fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
96fd66dbd4SStefan Hajnoczi int ret;
97fd66dbd4SStefan Hajnoczi
98fd66dbd4SStefan Hajnoczi if (s->bounce_pool.addr) {
99fd66dbd4SStefan Hajnoczi blkio_unmap_mem_region(s->blkio, &s->bounce_pool);
100fd66dbd4SStefan Hajnoczi blkio_free_mem_region(s->blkio, &s->bounce_pool);
101fd66dbd4SStefan Hajnoczi memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
102fd66dbd4SStefan Hajnoczi }
103fd66dbd4SStefan Hajnoczi
104fd66dbd4SStefan Hajnoczi /* Automatically freed when s->blkio is destroyed */
105fd66dbd4SStefan Hajnoczi ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes);
106fd66dbd4SStefan Hajnoczi if (ret < 0) {
107fd66dbd4SStefan Hajnoczi return ret;
108fd66dbd4SStefan Hajnoczi }
109fd66dbd4SStefan Hajnoczi
110fd66dbd4SStefan Hajnoczi ret = blkio_map_mem_region(s->blkio, &s->bounce_pool);
111fd66dbd4SStefan Hajnoczi if (ret < 0) {
112fd66dbd4SStefan Hajnoczi blkio_free_mem_region(s->blkio, &s->bounce_pool);
113fd66dbd4SStefan Hajnoczi memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
114fd66dbd4SStefan Hajnoczi return ret;
115fd66dbd4SStefan Hajnoczi }
116fd66dbd4SStefan Hajnoczi }
117fd66dbd4SStefan Hajnoczi
118fd66dbd4SStefan Hajnoczi return 0;
119fd66dbd4SStefan Hajnoczi }
120fd66dbd4SStefan Hajnoczi
121fd66dbd4SStefan Hajnoczi /* Called with s->bounce_lock held */
122fd66dbd4SStefan Hajnoczi static bool
blkio_do_alloc_bounce_buffer(BDRVBlkioState * s,BlkioBounceBuf * bounce,int64_t bytes)123fd66dbd4SStefan Hajnoczi blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
124fd66dbd4SStefan Hajnoczi int64_t bytes)
125fd66dbd4SStefan Hajnoczi {
126fd66dbd4SStefan Hajnoczi void *addr = s->bounce_pool.addr;
127fd66dbd4SStefan Hajnoczi BlkioBounceBuf *cur = NULL;
128fd66dbd4SStefan Hajnoczi BlkioBounceBuf *prev = NULL;
129fd66dbd4SStefan Hajnoczi ptrdiff_t space;
130fd66dbd4SStefan Hajnoczi
131fd66dbd4SStefan Hajnoczi /*
132fd66dbd4SStefan Hajnoczi * This is just a linear search over the holes between requests. An
133fd66dbd4SStefan Hajnoczi * efficient allocator would be nice.
134fd66dbd4SStefan Hajnoczi */
135fd66dbd4SStefan Hajnoczi QLIST_FOREACH(cur, &s->bounce_bufs, next) {
136fd66dbd4SStefan Hajnoczi space = cur->buf.iov_base - addr;
137fd66dbd4SStefan Hajnoczi if (bytes <= space) {
138fd66dbd4SStefan Hajnoczi QLIST_INSERT_BEFORE(cur, bounce, next);
139fd66dbd4SStefan Hajnoczi bounce->buf.iov_base = addr;
140fd66dbd4SStefan Hajnoczi bounce->buf.iov_len = bytes;
141fd66dbd4SStefan Hajnoczi return true;
142fd66dbd4SStefan Hajnoczi }
143fd66dbd4SStefan Hajnoczi
144fd66dbd4SStefan Hajnoczi addr = cur->buf.iov_base + cur->buf.iov_len;
145fd66dbd4SStefan Hajnoczi prev = cur;
146fd66dbd4SStefan Hajnoczi }
147fd66dbd4SStefan Hajnoczi
148fd66dbd4SStefan Hajnoczi /* Is there space after the last request? */
149fd66dbd4SStefan Hajnoczi space = s->bounce_pool.addr + s->bounce_pool.len - addr;
150fd66dbd4SStefan Hajnoczi if (bytes > space) {
151fd66dbd4SStefan Hajnoczi return false;
152fd66dbd4SStefan Hajnoczi }
153fd66dbd4SStefan Hajnoczi if (prev) {
154fd66dbd4SStefan Hajnoczi QLIST_INSERT_AFTER(prev, bounce, next);
155fd66dbd4SStefan Hajnoczi } else {
156fd66dbd4SStefan Hajnoczi QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next);
157fd66dbd4SStefan Hajnoczi }
158fd66dbd4SStefan Hajnoczi bounce->buf.iov_base = addr;
159fd66dbd4SStefan Hajnoczi bounce->buf.iov_len = bytes;
160fd66dbd4SStefan Hajnoczi return true;
161fd66dbd4SStefan Hajnoczi }
162fd66dbd4SStefan Hajnoczi
163fd66dbd4SStefan Hajnoczi static int coroutine_fn
blkio_alloc_bounce_buffer(BDRVBlkioState * s,BlkioBounceBuf * bounce,int64_t bytes)164fd66dbd4SStefan Hajnoczi blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
165fd66dbd4SStefan Hajnoczi int64_t bytes)
166fd66dbd4SStefan Hajnoczi {
167fd66dbd4SStefan Hajnoczi /*
168fd66dbd4SStefan Hajnoczi * Ensure fairness: first time around we join the back of the queue,
169fd66dbd4SStefan Hajnoczi * subsequently we join the front so we don't lose our place.
170fd66dbd4SStefan Hajnoczi */
171fd66dbd4SStefan Hajnoczi CoQueueWaitFlags wait_flags = 0;
172fd66dbd4SStefan Hajnoczi
173fd66dbd4SStefan Hajnoczi QEMU_LOCK_GUARD(&s->bounce_lock);
174fd66dbd4SStefan Hajnoczi
175fd66dbd4SStefan Hajnoczi /* Ensure fairness: don't even try if other requests are already waiting */
176fd66dbd4SStefan Hajnoczi if (!qemu_co_queue_empty(&s->bounce_available)) {
177fd66dbd4SStefan Hajnoczi qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
178fd66dbd4SStefan Hajnoczi wait_flags);
179fd66dbd4SStefan Hajnoczi wait_flags = CO_QUEUE_WAIT_FRONT;
180fd66dbd4SStefan Hajnoczi }
181fd66dbd4SStefan Hajnoczi
182fd66dbd4SStefan Hajnoczi while (true) {
183fd66dbd4SStefan Hajnoczi if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) {
184fd66dbd4SStefan Hajnoczi /* Kick the next queued request since there may be space */
185fd66dbd4SStefan Hajnoczi qemu_co_queue_next(&s->bounce_available);
186fd66dbd4SStefan Hajnoczi return 0;
187fd66dbd4SStefan Hajnoczi }
188fd66dbd4SStefan Hajnoczi
189fd66dbd4SStefan Hajnoczi /*
190fd66dbd4SStefan Hajnoczi * If there are no in-flight requests then the pool was simply too
191fd66dbd4SStefan Hajnoczi * small.
192fd66dbd4SStefan Hajnoczi */
193fd66dbd4SStefan Hajnoczi if (QLIST_EMPTY(&s->bounce_bufs)) {
194fd66dbd4SStefan Hajnoczi bool ok;
195fd66dbd4SStefan Hajnoczi int ret;
196fd66dbd4SStefan Hajnoczi
197fd66dbd4SStefan Hajnoczi ret = blkio_resize_bounce_pool(s, bytes);
198fd66dbd4SStefan Hajnoczi if (ret < 0) {
199fd66dbd4SStefan Hajnoczi /* Kick the next queued request since that may fail too */
200fd66dbd4SStefan Hajnoczi qemu_co_queue_next(&s->bounce_available);
201fd66dbd4SStefan Hajnoczi return ret;
202fd66dbd4SStefan Hajnoczi }
203fd66dbd4SStefan Hajnoczi
204fd66dbd4SStefan Hajnoczi ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes);
205fd66dbd4SStefan Hajnoczi assert(ok); /* must have space this time */
206fd66dbd4SStefan Hajnoczi return 0;
207fd66dbd4SStefan Hajnoczi }
208fd66dbd4SStefan Hajnoczi
209fd66dbd4SStefan Hajnoczi qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
210fd66dbd4SStefan Hajnoczi wait_flags);
211fd66dbd4SStefan Hajnoczi wait_flags = CO_QUEUE_WAIT_FRONT;
212fd66dbd4SStefan Hajnoczi }
213fd66dbd4SStefan Hajnoczi }
214fd66dbd4SStefan Hajnoczi
blkio_free_bounce_buffer(BDRVBlkioState * s,BlkioBounceBuf * bounce)215fd66dbd4SStefan Hajnoczi static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s,
216fd66dbd4SStefan Hajnoczi BlkioBounceBuf *bounce)
217fd66dbd4SStefan Hajnoczi {
218fd66dbd4SStefan Hajnoczi QEMU_LOCK_GUARD(&s->bounce_lock);
219fd66dbd4SStefan Hajnoczi
220fd66dbd4SStefan Hajnoczi QLIST_REMOVE(bounce, next);
221fd66dbd4SStefan Hajnoczi
222fd66dbd4SStefan Hajnoczi /* Wake up waiting coroutines since space may now be available */
223fd66dbd4SStefan Hajnoczi qemu_co_queue_next(&s->bounce_available);
224fd66dbd4SStefan Hajnoczi }
225fd66dbd4SStefan Hajnoczi
226fd66dbd4SStefan Hajnoczi /* For async to .bdrv_co_*() conversion */
227fd66dbd4SStefan Hajnoczi typedef struct {
228fd66dbd4SStefan Hajnoczi Coroutine *coroutine;
229fd66dbd4SStefan Hajnoczi int ret;
230fd66dbd4SStefan Hajnoczi } BlkioCoData;
231fd66dbd4SStefan Hajnoczi
blkio_completion_fd_read(void * opaque)232fd66dbd4SStefan Hajnoczi static void blkio_completion_fd_read(void *opaque)
233fd66dbd4SStefan Hajnoczi {
234fd66dbd4SStefan Hajnoczi BlockDriverState *bs = opaque;
235fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
236fd66dbd4SStefan Hajnoczi uint64_t val;
237fd66dbd4SStefan Hajnoczi int ret;
238fd66dbd4SStefan Hajnoczi
239fd66dbd4SStefan Hajnoczi /* Polling may have already fetched a completion */
240fd66dbd4SStefan Hajnoczi if (s->poll_completion.user_data != NULL) {
241fd66dbd4SStefan Hajnoczi BlkioCoData *cod = s->poll_completion.user_data;
242fd66dbd4SStefan Hajnoczi cod->ret = s->poll_completion.ret;
243fd66dbd4SStefan Hajnoczi
244fd66dbd4SStefan Hajnoczi /* Clear it in case aio_co_wake() enters a nested event loop */
245fd66dbd4SStefan Hajnoczi s->poll_completion.user_data = NULL;
246fd66dbd4SStefan Hajnoczi
247fd66dbd4SStefan Hajnoczi aio_co_wake(cod->coroutine);
248fd66dbd4SStefan Hajnoczi }
249fd66dbd4SStefan Hajnoczi
250fd66dbd4SStefan Hajnoczi /* Reset completion fd status */
251fd66dbd4SStefan Hajnoczi ret = read(s->completion_fd, &val, sizeof(val));
252fd66dbd4SStefan Hajnoczi
253fd66dbd4SStefan Hajnoczi /* Ignore errors, there's nothing we can do */
254fd66dbd4SStefan Hajnoczi (void)ret;
255fd66dbd4SStefan Hajnoczi
256fd66dbd4SStefan Hajnoczi /*
257fd66dbd4SStefan Hajnoczi * Reading one completion at a time makes nested event loop re-entrancy
258fd66dbd4SStefan Hajnoczi * simple. Change this loop to get multiple completions in one go if it
259fd66dbd4SStefan Hajnoczi * becomes a performance bottleneck.
260fd66dbd4SStefan Hajnoczi */
261fd66dbd4SStefan Hajnoczi while (true) {
262fd66dbd4SStefan Hajnoczi struct blkio_completion completion;
263fd66dbd4SStefan Hajnoczi
264fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
265fd66dbd4SStefan Hajnoczi ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL);
266fd66dbd4SStefan Hajnoczi }
267fd66dbd4SStefan Hajnoczi if (ret != 1) {
268fd66dbd4SStefan Hajnoczi break;
269fd66dbd4SStefan Hajnoczi }
270fd66dbd4SStefan Hajnoczi
271fd66dbd4SStefan Hajnoczi BlkioCoData *cod = completion.user_data;
272fd66dbd4SStefan Hajnoczi cod->ret = completion.ret;
273fd66dbd4SStefan Hajnoczi aio_co_wake(cod->coroutine);
274fd66dbd4SStefan Hajnoczi }
275fd66dbd4SStefan Hajnoczi }
276fd66dbd4SStefan Hajnoczi
blkio_completion_fd_poll(void * opaque)277fd66dbd4SStefan Hajnoczi static bool blkio_completion_fd_poll(void *opaque)
278fd66dbd4SStefan Hajnoczi {
279fd66dbd4SStefan Hajnoczi BlockDriverState *bs = opaque;
280fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
281fd66dbd4SStefan Hajnoczi int ret;
282fd66dbd4SStefan Hajnoczi
283fd66dbd4SStefan Hajnoczi /* Just in case we already fetched a completion */
284fd66dbd4SStefan Hajnoczi if (s->poll_completion.user_data != NULL) {
285fd66dbd4SStefan Hajnoczi return true;
286fd66dbd4SStefan Hajnoczi }
287fd66dbd4SStefan Hajnoczi
288fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
289fd66dbd4SStefan Hajnoczi ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL);
290fd66dbd4SStefan Hajnoczi }
291fd66dbd4SStefan Hajnoczi return ret == 1;
292fd66dbd4SStefan Hajnoczi }
293fd66dbd4SStefan Hajnoczi
blkio_completion_fd_poll_ready(void * opaque)294fd66dbd4SStefan Hajnoczi static void blkio_completion_fd_poll_ready(void *opaque)
295fd66dbd4SStefan Hajnoczi {
296fd66dbd4SStefan Hajnoczi blkio_completion_fd_read(opaque);
297fd66dbd4SStefan Hajnoczi }
298fd66dbd4SStefan Hajnoczi
blkio_attach_aio_context(BlockDriverState * bs,AioContext * new_context)299fd66dbd4SStefan Hajnoczi static void blkio_attach_aio_context(BlockDriverState *bs,
300fd66dbd4SStefan Hajnoczi AioContext *new_context)
301fd66dbd4SStefan Hajnoczi {
302fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
303fd66dbd4SStefan Hajnoczi
30460f782b6SStefan Hajnoczi aio_set_fd_handler(new_context, s->completion_fd,
30560f782b6SStefan Hajnoczi blkio_completion_fd_read, NULL,
306fd66dbd4SStefan Hajnoczi blkio_completion_fd_poll,
30760f782b6SStefan Hajnoczi blkio_completion_fd_poll_ready, bs);
308fd66dbd4SStefan Hajnoczi }
309fd66dbd4SStefan Hajnoczi
blkio_detach_aio_context(BlockDriverState * bs)310fd66dbd4SStefan Hajnoczi static void blkio_detach_aio_context(BlockDriverState *bs)
311fd66dbd4SStefan Hajnoczi {
312fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
313fd66dbd4SStefan Hajnoczi
31460f782b6SStefan Hajnoczi aio_set_fd_handler(bdrv_get_aio_context(bs), s->completion_fd, NULL, NULL,
31560f782b6SStefan Hajnoczi NULL, NULL, NULL);
316fd66dbd4SStefan Hajnoczi }
317fd66dbd4SStefan Hajnoczi
31828ff7b4dSStefan Hajnoczi /*
319ccee48aaSStefan Hajnoczi * Called by defer_call_end() or immediately if not in a deferred section.
320ccee48aaSStefan Hajnoczi * Called without blkio_lock.
32128ff7b4dSStefan Hajnoczi */
blkio_deferred_fn(void * opaque)322ccee48aaSStefan Hajnoczi static void blkio_deferred_fn(void *opaque)
323fd66dbd4SStefan Hajnoczi {
32428ff7b4dSStefan Hajnoczi BDRVBlkioState *s = opaque;
325fd66dbd4SStefan Hajnoczi
32628ff7b4dSStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
327fd66dbd4SStefan Hajnoczi blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
328fd66dbd4SStefan Hajnoczi }
329fd66dbd4SStefan Hajnoczi }
330fd66dbd4SStefan Hajnoczi
33128ff7b4dSStefan Hajnoczi /*
33228ff7b4dSStefan Hajnoczi * Schedule I/O submission after enqueuing a new request. Called without
33328ff7b4dSStefan Hajnoczi * blkio_lock.
33428ff7b4dSStefan Hajnoczi */
blkio_submit_io(BlockDriverState * bs)33528ff7b4dSStefan Hajnoczi static void blkio_submit_io(BlockDriverState *bs)
33628ff7b4dSStefan Hajnoczi {
33728ff7b4dSStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
33828ff7b4dSStefan Hajnoczi
339ccee48aaSStefan Hajnoczi defer_call(blkio_deferred_fn, s);
34028ff7b4dSStefan Hajnoczi }
34128ff7b4dSStefan Hajnoczi
342fd66dbd4SStefan Hajnoczi static int coroutine_fn
blkio_co_pdiscard(BlockDriverState * bs,int64_t offset,int64_t bytes)343fd66dbd4SStefan Hajnoczi blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
344fd66dbd4SStefan Hajnoczi {
345fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
346fd66dbd4SStefan Hajnoczi BlkioCoData cod = {
347fd66dbd4SStefan Hajnoczi .coroutine = qemu_coroutine_self(),
348fd66dbd4SStefan Hajnoczi };
349fd66dbd4SStefan Hajnoczi
350fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
351fd66dbd4SStefan Hajnoczi blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
352fd66dbd4SStefan Hajnoczi }
353fd66dbd4SStefan Hajnoczi
35428ff7b4dSStefan Hajnoczi blkio_submit_io(bs);
355fd66dbd4SStefan Hajnoczi qemu_coroutine_yield();
356fd66dbd4SStefan Hajnoczi return cod.ret;
357fd66dbd4SStefan Hajnoczi }
358fd66dbd4SStefan Hajnoczi
359fd66dbd4SStefan Hajnoczi static int coroutine_fn
blkio_co_preadv(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)360fd66dbd4SStefan Hajnoczi blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
361fd66dbd4SStefan Hajnoczi QEMUIOVector *qiov, BdrvRequestFlags flags)
362fd66dbd4SStefan Hajnoczi {
363fd66dbd4SStefan Hajnoczi BlkioCoData cod = {
364fd66dbd4SStefan Hajnoczi .coroutine = qemu_coroutine_self(),
365fd66dbd4SStefan Hajnoczi };
366fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
367c5640b3eSStefan Hajnoczi bool use_bounce_buffer =
368c5640b3eSStefan Hajnoczi s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
369fd66dbd4SStefan Hajnoczi BlkioBounceBuf bounce;
370fd66dbd4SStefan Hajnoczi struct iovec *iov = qiov->iov;
371fd66dbd4SStefan Hajnoczi int iovcnt = qiov->niov;
372fd66dbd4SStefan Hajnoczi
373fd66dbd4SStefan Hajnoczi if (use_bounce_buffer) {
374fd66dbd4SStefan Hajnoczi int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
375fd66dbd4SStefan Hajnoczi if (ret < 0) {
376fd66dbd4SStefan Hajnoczi return ret;
377fd66dbd4SStefan Hajnoczi }
378fd66dbd4SStefan Hajnoczi
379fd66dbd4SStefan Hajnoczi iov = &bounce.buf;
380fd66dbd4SStefan Hajnoczi iovcnt = 1;
381fd66dbd4SStefan Hajnoczi }
382fd66dbd4SStefan Hajnoczi
383fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
384fd66dbd4SStefan Hajnoczi blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
385fd66dbd4SStefan Hajnoczi }
386fd66dbd4SStefan Hajnoczi
38728ff7b4dSStefan Hajnoczi blkio_submit_io(bs);
388fd66dbd4SStefan Hajnoczi qemu_coroutine_yield();
389fd66dbd4SStefan Hajnoczi
390fd66dbd4SStefan Hajnoczi if (use_bounce_buffer) {
391fd66dbd4SStefan Hajnoczi if (cod.ret == 0) {
392fd66dbd4SStefan Hajnoczi qemu_iovec_from_buf(qiov, 0,
393fd66dbd4SStefan Hajnoczi bounce.buf.iov_base,
394fd66dbd4SStefan Hajnoczi bounce.buf.iov_len);
395fd66dbd4SStefan Hajnoczi }
396fd66dbd4SStefan Hajnoczi
397fd66dbd4SStefan Hajnoczi blkio_free_bounce_buffer(s, &bounce);
398fd66dbd4SStefan Hajnoczi }
399fd66dbd4SStefan Hajnoczi
400fd66dbd4SStefan Hajnoczi return cod.ret;
401fd66dbd4SStefan Hajnoczi }
402fd66dbd4SStefan Hajnoczi
blkio_co_pwritev(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)403fd66dbd4SStefan Hajnoczi static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
404fd66dbd4SStefan Hajnoczi int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
405fd66dbd4SStefan Hajnoczi {
406fd66dbd4SStefan Hajnoczi uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
407fd66dbd4SStefan Hajnoczi BlkioCoData cod = {
408fd66dbd4SStefan Hajnoczi .coroutine = qemu_coroutine_self(),
409fd66dbd4SStefan Hajnoczi };
410fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
411c5640b3eSStefan Hajnoczi bool use_bounce_buffer =
412c5640b3eSStefan Hajnoczi s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
413fd66dbd4SStefan Hajnoczi BlkioBounceBuf bounce;
414fd66dbd4SStefan Hajnoczi struct iovec *iov = qiov->iov;
415fd66dbd4SStefan Hajnoczi int iovcnt = qiov->niov;
416fd66dbd4SStefan Hajnoczi
417fd66dbd4SStefan Hajnoczi if (use_bounce_buffer) {
418fd66dbd4SStefan Hajnoczi int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
419fd66dbd4SStefan Hajnoczi if (ret < 0) {
420fd66dbd4SStefan Hajnoczi return ret;
421fd66dbd4SStefan Hajnoczi }
422fd66dbd4SStefan Hajnoczi
423fd66dbd4SStefan Hajnoczi qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes);
424fd66dbd4SStefan Hajnoczi iov = &bounce.buf;
425fd66dbd4SStefan Hajnoczi iovcnt = 1;
426fd66dbd4SStefan Hajnoczi }
427fd66dbd4SStefan Hajnoczi
428fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
429fd66dbd4SStefan Hajnoczi blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
430fd66dbd4SStefan Hajnoczi }
431fd66dbd4SStefan Hajnoczi
43228ff7b4dSStefan Hajnoczi blkio_submit_io(bs);
433fd66dbd4SStefan Hajnoczi qemu_coroutine_yield();
434fd66dbd4SStefan Hajnoczi
435fd66dbd4SStefan Hajnoczi if (use_bounce_buffer) {
436fd66dbd4SStefan Hajnoczi blkio_free_bounce_buffer(s, &bounce);
437fd66dbd4SStefan Hajnoczi }
438fd66dbd4SStefan Hajnoczi
439fd66dbd4SStefan Hajnoczi return cod.ret;
440fd66dbd4SStefan Hajnoczi }
441fd66dbd4SStefan Hajnoczi
blkio_co_flush(BlockDriverState * bs)442fd66dbd4SStefan Hajnoczi static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
443fd66dbd4SStefan Hajnoczi {
444fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
445fd66dbd4SStefan Hajnoczi BlkioCoData cod = {
446fd66dbd4SStefan Hajnoczi .coroutine = qemu_coroutine_self(),
447fd66dbd4SStefan Hajnoczi };
448fd66dbd4SStefan Hajnoczi
449fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
450fd66dbd4SStefan Hajnoczi blkioq_flush(s->blkioq, &cod, 0);
451fd66dbd4SStefan Hajnoczi }
452fd66dbd4SStefan Hajnoczi
45328ff7b4dSStefan Hajnoczi blkio_submit_io(bs);
454fd66dbd4SStefan Hajnoczi qemu_coroutine_yield();
455fd66dbd4SStefan Hajnoczi return cod.ret;
456fd66dbd4SStefan Hajnoczi }
457fd66dbd4SStefan Hajnoczi
blkio_co_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)458fd66dbd4SStefan Hajnoczi static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
459fd66dbd4SStefan Hajnoczi int64_t offset, int64_t bytes, BdrvRequestFlags flags)
460fd66dbd4SStefan Hajnoczi {
461fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
462fd66dbd4SStefan Hajnoczi BlkioCoData cod = {
463fd66dbd4SStefan Hajnoczi .coroutine = qemu_coroutine_self(),
464fd66dbd4SStefan Hajnoczi };
465fd66dbd4SStefan Hajnoczi uint32_t blkio_flags = 0;
466fd66dbd4SStefan Hajnoczi
467fd66dbd4SStefan Hajnoczi if (flags & BDRV_REQ_FUA) {
468fd66dbd4SStefan Hajnoczi blkio_flags |= BLKIO_REQ_FUA;
469fd66dbd4SStefan Hajnoczi }
470fd66dbd4SStefan Hajnoczi if (!(flags & BDRV_REQ_MAY_UNMAP)) {
471fd66dbd4SStefan Hajnoczi blkio_flags |= BLKIO_REQ_NO_UNMAP;
472fd66dbd4SStefan Hajnoczi }
473fd66dbd4SStefan Hajnoczi if (flags & BDRV_REQ_NO_FALLBACK) {
474fd66dbd4SStefan Hajnoczi blkio_flags |= BLKIO_REQ_NO_FALLBACK;
475fd66dbd4SStefan Hajnoczi }
476fd66dbd4SStefan Hajnoczi
477fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
478fd66dbd4SStefan Hajnoczi blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
479fd66dbd4SStefan Hajnoczi }
480fd66dbd4SStefan Hajnoczi
48128ff7b4dSStefan Hajnoczi blkio_submit_io(bs);
482fd66dbd4SStefan Hajnoczi qemu_coroutine_yield();
483fd66dbd4SStefan Hajnoczi return cod.ret;
484fd66dbd4SStefan Hajnoczi }
485fd66dbd4SStefan Hajnoczi
486c5640b3eSStefan Hajnoczi typedef enum {
487c5640b3eSStefan Hajnoczi BMRR_OK,
488c5640b3eSStefan Hajnoczi BMRR_SKIP,
489c5640b3eSStefan Hajnoczi BMRR_FAIL,
490c5640b3eSStefan Hajnoczi } BlkioMemRegionResult;
491c5640b3eSStefan Hajnoczi
492c5640b3eSStefan Hajnoczi /*
493c5640b3eSStefan Hajnoczi * Produce a struct blkio_mem_region for a given address and size.
494c5640b3eSStefan Hajnoczi *
495c5640b3eSStefan Hajnoczi * This function produces identical results when called multiple times with the
496c5640b3eSStefan Hajnoczi * same arguments. This property is necessary because blkio_unmap_mem_region()
497c5640b3eSStefan Hajnoczi * must receive the same struct blkio_mem_region field values that were passed
498c5640b3eSStefan Hajnoczi * to blkio_map_mem_region().
499c5640b3eSStefan Hajnoczi */
500c5640b3eSStefan Hajnoczi static BlkioMemRegionResult
blkio_mem_region_from_host(BlockDriverState * bs,void * host,size_t size,struct blkio_mem_region * region,Error ** errp)501c5640b3eSStefan Hajnoczi blkio_mem_region_from_host(BlockDriverState *bs,
502c5640b3eSStefan Hajnoczi void *host, size_t size,
503c5640b3eSStefan Hajnoczi struct blkio_mem_region *region,
504c5640b3eSStefan Hajnoczi Error **errp)
505c5640b3eSStefan Hajnoczi {
506c5640b3eSStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
507c5640b3eSStefan Hajnoczi int fd = -1;
508c5640b3eSStefan Hajnoczi ram_addr_t fd_offset = 0;
509c5640b3eSStefan Hajnoczi
510c5640b3eSStefan Hajnoczi if (((uintptr_t)host | size) % s->mem_region_alignment) {
511c5640b3eSStefan Hajnoczi error_setg(errp, "unaligned buf %p with size %zu", host, size);
512c5640b3eSStefan Hajnoczi return BMRR_FAIL;
513c5640b3eSStefan Hajnoczi }
514c5640b3eSStefan Hajnoczi
515c5640b3eSStefan Hajnoczi /* Attempt to find the fd for the underlying memory */
516c5640b3eSStefan Hajnoczi if (s->needs_mem_region_fd) {
517c5640b3eSStefan Hajnoczi RAMBlock *ram_block;
518c5640b3eSStefan Hajnoczi RAMBlock *end_block;
519c5640b3eSStefan Hajnoczi ram_addr_t offset;
520c5640b3eSStefan Hajnoczi
521c5640b3eSStefan Hajnoczi /*
522c5640b3eSStefan Hajnoczi * bdrv_register_buf() is called with the BQL held so mr lives at least
523c5640b3eSStefan Hajnoczi * until this function returns.
524c5640b3eSStefan Hajnoczi */
525c5640b3eSStefan Hajnoczi ram_block = qemu_ram_block_from_host(host, false, &fd_offset);
526c5640b3eSStefan Hajnoczi if (ram_block) {
527c5640b3eSStefan Hajnoczi fd = qemu_ram_get_fd(ram_block);
528c5640b3eSStefan Hajnoczi }
529c5640b3eSStefan Hajnoczi if (fd == -1) {
530c5640b3eSStefan Hajnoczi /*
531c5640b3eSStefan Hajnoczi * Ideally every RAMBlock would have an fd. pc-bios and other
532c5640b3eSStefan Hajnoczi * things don't. Luckily they are usually not I/O buffers and we
533c5640b3eSStefan Hajnoczi * can just ignore them.
534c5640b3eSStefan Hajnoczi */
535c5640b3eSStefan Hajnoczi return BMRR_SKIP;
536c5640b3eSStefan Hajnoczi }
537c5640b3eSStefan Hajnoczi
538c5640b3eSStefan Hajnoczi /* Make sure the fd covers the entire range */
539c5640b3eSStefan Hajnoczi end_block = qemu_ram_block_from_host(host + size - 1, false, &offset);
540c5640b3eSStefan Hajnoczi if (ram_block != end_block) {
541c5640b3eSStefan Hajnoczi error_setg(errp, "registered buffer at %p with size %zu extends "
542c5640b3eSStefan Hajnoczi "beyond RAMBlock", host, size);
543c5640b3eSStefan Hajnoczi return BMRR_FAIL;
544c5640b3eSStefan Hajnoczi }
545c5640b3eSStefan Hajnoczi }
546c5640b3eSStefan Hajnoczi
547c5640b3eSStefan Hajnoczi *region = (struct blkio_mem_region){
548c5640b3eSStefan Hajnoczi .addr = host,
549c5640b3eSStefan Hajnoczi .len = size,
550c5640b3eSStefan Hajnoczi .fd = fd,
551c5640b3eSStefan Hajnoczi .fd_offset = fd_offset,
552c5640b3eSStefan Hajnoczi };
553c5640b3eSStefan Hajnoczi return BMRR_OK;
554c5640b3eSStefan Hajnoczi }
555c5640b3eSStefan Hajnoczi
blkio_register_buf(BlockDriverState * bs,void * host,size_t size,Error ** errp)556c5640b3eSStefan Hajnoczi static bool blkio_register_buf(BlockDriverState *bs, void *host, size_t size,
557c5640b3eSStefan Hajnoczi Error **errp)
558c5640b3eSStefan Hajnoczi {
559c5640b3eSStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
560c5640b3eSStefan Hajnoczi struct blkio_mem_region region;
561c5640b3eSStefan Hajnoczi BlkioMemRegionResult region_result;
562c5640b3eSStefan Hajnoczi int ret;
563c5640b3eSStefan Hajnoczi
564c5640b3eSStefan Hajnoczi /*
565c5640b3eSStefan Hajnoczi * Mapping memory regions conflicts with RAM discard (virtio-mem) when
566c5640b3eSStefan Hajnoczi * there is pinning, so only do it when necessary.
567c5640b3eSStefan Hajnoczi */
568c5640b3eSStefan Hajnoczi if (!s->needs_mem_regions && s->may_pin_mem_regions) {
569c5640b3eSStefan Hajnoczi return true;
570c5640b3eSStefan Hajnoczi }
571c5640b3eSStefan Hajnoczi
572c5640b3eSStefan Hajnoczi region_result = blkio_mem_region_from_host(bs, host, size, ®ion, errp);
573c5640b3eSStefan Hajnoczi if (region_result == BMRR_SKIP) {
574c5640b3eSStefan Hajnoczi return true;
575c5640b3eSStefan Hajnoczi } else if (region_result != BMRR_OK) {
576c5640b3eSStefan Hajnoczi return false;
577c5640b3eSStefan Hajnoczi }
578c5640b3eSStefan Hajnoczi
579c5640b3eSStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
580c5640b3eSStefan Hajnoczi ret = blkio_map_mem_region(s->blkio, ®ion);
581c5640b3eSStefan Hajnoczi }
582c5640b3eSStefan Hajnoczi
583c5640b3eSStefan Hajnoczi if (ret < 0) {
584c5640b3eSStefan Hajnoczi error_setg(errp, "Failed to add blkio mem region %p with size %zu: %s",
585c5640b3eSStefan Hajnoczi host, size, blkio_get_error_msg());
586c5640b3eSStefan Hajnoczi return false;
587c5640b3eSStefan Hajnoczi }
588c5640b3eSStefan Hajnoczi return true;
589c5640b3eSStefan Hajnoczi }
590c5640b3eSStefan Hajnoczi
blkio_unregister_buf(BlockDriverState * bs,void * host,size_t size)591c5640b3eSStefan Hajnoczi static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
592c5640b3eSStefan Hajnoczi {
593c5640b3eSStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
594c5640b3eSStefan Hajnoczi struct blkio_mem_region region;
595c5640b3eSStefan Hajnoczi
596c5640b3eSStefan Hajnoczi /* See blkio_register_buf() */
597c5640b3eSStefan Hajnoczi if (!s->needs_mem_regions && s->may_pin_mem_regions) {
598c5640b3eSStefan Hajnoczi return;
599c5640b3eSStefan Hajnoczi }
600c5640b3eSStefan Hajnoczi
601c5640b3eSStefan Hajnoczi if (blkio_mem_region_from_host(bs, host, size, ®ion, NULL) != BMRR_OK) {
602c5640b3eSStefan Hajnoczi return;
603c5640b3eSStefan Hajnoczi }
604c5640b3eSStefan Hajnoczi
605c5640b3eSStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
606c5640b3eSStefan Hajnoczi blkio_unmap_mem_region(s->blkio, ®ion);
607c5640b3eSStefan Hajnoczi }
608c5640b3eSStefan Hajnoczi }
609c5640b3eSStefan Hajnoczi
blkio_io_uring_connect(BlockDriverState * bs,QDict * options,int flags,Error ** errp)61069785d66SStefano Garzarella static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
61169785d66SStefano Garzarella int flags, Error **errp)
612fd66dbd4SStefan Hajnoczi {
613fd66dbd4SStefan Hajnoczi const char *filename = qdict_get_str(options, "filename");
614fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
615fd66dbd4SStefan Hajnoczi int ret;
616fd66dbd4SStefan Hajnoczi
617fd66dbd4SStefan Hajnoczi ret = blkio_set_str(s->blkio, "path", filename);
618fd66dbd4SStefan Hajnoczi qdict_del(options, "filename");
619fd66dbd4SStefan Hajnoczi if (ret < 0) {
620fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to set path: %s",
621fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
622fd66dbd4SStefan Hajnoczi return ret;
623fd66dbd4SStefan Hajnoczi }
624fd66dbd4SStefan Hajnoczi
625fd66dbd4SStefan Hajnoczi if (flags & BDRV_O_NOCACHE) {
626fd66dbd4SStefan Hajnoczi ret = blkio_set_bool(s->blkio, "direct", true);
627fd66dbd4SStefan Hajnoczi if (ret < 0) {
628fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to set direct: %s",
629fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
630fd66dbd4SStefan Hajnoczi return ret;
631fd66dbd4SStefan Hajnoczi }
632fd66dbd4SStefan Hajnoczi }
633fd66dbd4SStefan Hajnoczi
63469785d66SStefano Garzarella ret = blkio_connect(s->blkio);
63569785d66SStefano Garzarella if (ret < 0) {
63669785d66SStefano Garzarella error_setg_errno(errp, -ret, "blkio_connect failed: %s",
63769785d66SStefano Garzarella blkio_get_error_msg());
63869785d66SStefano Garzarella return ret;
63969785d66SStefano Garzarella }
64069785d66SStefano Garzarella
641fd66dbd4SStefan Hajnoczi return 0;
642fd66dbd4SStefan Hajnoczi }
643fd66dbd4SStefan Hajnoczi
blkio_nvme_io_uring_connect(BlockDriverState * bs,QDict * options,int flags,Error ** errp)64469785d66SStefano Garzarella static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
64569785d66SStefano Garzarella int flags, Error **errp)
646fd66dbd4SStefan Hajnoczi {
6476c32fc0dSAlberto Faria const char *path = qdict_get_try_str(options, "path");
648fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
649fd66dbd4SStefan Hajnoczi int ret;
650fd66dbd4SStefan Hajnoczi
6516c32fc0dSAlberto Faria if (!path) {
6526c32fc0dSAlberto Faria error_setg(errp, "missing 'path' option");
6536c32fc0dSAlberto Faria return -EINVAL;
6546c32fc0dSAlberto Faria }
6556c32fc0dSAlberto Faria
6566c32fc0dSAlberto Faria ret = blkio_set_str(s->blkio, "path", path);
6576c32fc0dSAlberto Faria qdict_del(options, "path");
658fd66dbd4SStefan Hajnoczi if (ret < 0) {
659fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to set path: %s",
660fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
661fd66dbd4SStefan Hajnoczi return ret;
662fd66dbd4SStefan Hajnoczi }
663fd66dbd4SStefan Hajnoczi
664fd66dbd4SStefan Hajnoczi if (!(flags & BDRV_O_NOCACHE)) {
665fd66dbd4SStefan Hajnoczi error_setg(errp, "cache.direct=off is not supported");
666fd66dbd4SStefan Hajnoczi return -EINVAL;
667fd66dbd4SStefan Hajnoczi }
668fd66dbd4SStefan Hajnoczi
66969785d66SStefano Garzarella ret = blkio_connect(s->blkio);
67069785d66SStefano Garzarella if (ret < 0) {
67169785d66SStefano Garzarella error_setg_errno(errp, -ret, "blkio_connect failed: %s",
67269785d66SStefano Garzarella blkio_get_error_msg());
67369785d66SStefano Garzarella return ret;
67469785d66SStefano Garzarella }
67569785d66SStefano Garzarella
676fd66dbd4SStefan Hajnoczi return 0;
677fd66dbd4SStefan Hajnoczi }
678fd66dbd4SStefan Hajnoczi
blkio_virtio_blk_connect(BlockDriverState * bs,QDict * options,int flags,Error ** errp)67969785d66SStefano Garzarella static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
68069785d66SStefano Garzarella int flags, Error **errp)
681fd66dbd4SStefan Hajnoczi {
682fd66dbd4SStefan Hajnoczi const char *path = qdict_get_try_str(options, "path");
683fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
684cad2ccc3SStefano Garzarella bool fd_supported = false;
6850b054b4cSStefano Garzarella int fd = -1, ret;
686fd66dbd4SStefan Hajnoczi
687fd66dbd4SStefan Hajnoczi if (!path) {
688fd66dbd4SStefan Hajnoczi error_setg(errp, "missing 'path' option");
689fd66dbd4SStefan Hajnoczi return -EINVAL;
690fd66dbd4SStefan Hajnoczi }
691fd66dbd4SStefan Hajnoczi
692cad2ccc3SStefano Garzarella if (!(flags & BDRV_O_NOCACHE)) {
693cad2ccc3SStefano Garzarella error_setg(errp, "cache.direct=off is not supported");
694cad2ccc3SStefano Garzarella return -EINVAL;
695cad2ccc3SStefano Garzarella }
696cad2ccc3SStefano Garzarella
6971c38fe69SStefano Garzarella if (blkio_set_int(s->blkio, "fd", -1) == 0) {
698cad2ccc3SStefano Garzarella fd_supported = true;
699cad2ccc3SStefano Garzarella }
700cad2ccc3SStefano Garzarella
701cad2ccc3SStefano Garzarella /*
702cad2ccc3SStefano Garzarella * If the libblkio driver supports fd passing, let's always use qemu_open()
703cad2ccc3SStefano Garzarella * to open the `path`, so we can handle fd passing from the management
704cad2ccc3SStefano Garzarella * layer through the "/dev/fdset/N" special path.
705cad2ccc3SStefano Garzarella */
706cad2ccc3SStefano Garzarella if (fd_supported) {
707a5942c17SStefano Garzarella /*
708a5942c17SStefano Garzarella * `path` can contain the path of a character device
709a5942c17SStefano Garzarella * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
710a5942c17SStefano Garzarella *
711a5942c17SStefano Garzarella * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
712a5942c17SStefano Garzarella * is not set in the open flags, because the exchange of IOCTL commands
713a5942c17SStefano Garzarella * for example will fail.
714a5942c17SStefano Garzarella *
715a5942c17SStefano Garzarella * In order to open the device read-only, we are using the `read-only`
716d656aaa1SPaolo Bonzini * property of the libblkio driver in blkio_open().
717a5942c17SStefano Garzarella */
718723bea27SStefano Garzarella fd = qemu_open(path, O_RDWR, NULL);
719cad2ccc3SStefano Garzarella if (fd < 0) {
7209b06d0d0SStefano Garzarella /*
7219b06d0d0SStefano Garzarella * qemu_open() can fail if the user specifies a path that is not
7229b06d0d0SStefano Garzarella * a file or device, for example in the case of Unix Domain Socket
7239b06d0d0SStefano Garzarella * for the virtio-blk-vhost-user driver. In such cases let's have
7249b06d0d0SStefano Garzarella * libblkio open the path directly.
7259b06d0d0SStefano Garzarella */
726723bea27SStefano Garzarella fd_supported = false;
727723bea27SStefano Garzarella } else {
728cad2ccc3SStefano Garzarella ret = blkio_set_int(s->blkio, "fd", fd);
729cad2ccc3SStefano Garzarella if (ret < 0) {
730723bea27SStefano Garzarella fd_supported = false;
731cad2ccc3SStefano Garzarella qemu_close(fd);
7320b054b4cSStefano Garzarella fd = -1;
733cad2ccc3SStefano Garzarella }
734723bea27SStefano Garzarella }
735723bea27SStefano Garzarella }
736723bea27SStefano Garzarella
737723bea27SStefano Garzarella if (!fd_supported) {
738fd66dbd4SStefan Hajnoczi ret = blkio_set_str(s->blkio, "path", path);
739fd66dbd4SStefan Hajnoczi if (ret < 0) {
740fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to set path: %s",
741fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
742fd66dbd4SStefan Hajnoczi return ret;
743fd66dbd4SStefan Hajnoczi }
744fd66dbd4SStefan Hajnoczi }
745cad2ccc3SStefano Garzarella
74669785d66SStefano Garzarella ret = blkio_connect(s->blkio);
7470b054b4cSStefano Garzarella if (ret < 0 && fd >= 0) {
7480b054b4cSStefano Garzarella /* Failed to give the FD to libblkio, close it */
7490b054b4cSStefano Garzarella qemu_close(fd);
7500b054b4cSStefano Garzarella fd = -1;
7510b054b4cSStefano Garzarella }
7520b054b4cSStefano Garzarella
753809c319fSStefano Garzarella /*
7549b06d0d0SStefano Garzarella * Before https://gitlab.com/libblkio/libblkio/-/merge_requests/208
7559b06d0d0SStefano Garzarella * (libblkio <= v1.3.0), setting the `fd` property is not enough to check
7569b06d0d0SStefano Garzarella * whether the driver supports the `fd` property or not. In that case,
7579b06d0d0SStefano Garzarella * blkio_connect() will fail with -EINVAL.
7589b06d0d0SStefano Garzarella * So let's try calling blkio_connect() again by directly setting `path`
7599b06d0d0SStefano Garzarella * to cover this scenario.
760809c319fSStefano Garzarella */
761809c319fSStefano Garzarella if (fd_supported && ret == -EINVAL) {
762809c319fSStefano Garzarella /*
763809c319fSStefano Garzarella * We need to clear the `fd` property we set previously by setting
764809c319fSStefano Garzarella * it to -1.
765809c319fSStefano Garzarella */
766809c319fSStefano Garzarella ret = blkio_set_int(s->blkio, "fd", -1);
767809c319fSStefano Garzarella if (ret < 0) {
768809c319fSStefano Garzarella error_setg_errno(errp, -ret, "failed to set fd: %s",
769809c319fSStefano Garzarella blkio_get_error_msg());
770809c319fSStefano Garzarella return ret;
771809c319fSStefano Garzarella }
772809c319fSStefano Garzarella
773809c319fSStefano Garzarella ret = blkio_set_str(s->blkio, "path", path);
774809c319fSStefano Garzarella if (ret < 0) {
775809c319fSStefano Garzarella error_setg_errno(errp, -ret, "failed to set path: %s",
776809c319fSStefano Garzarella blkio_get_error_msg());
777809c319fSStefano Garzarella return ret;
778809c319fSStefano Garzarella }
779809c319fSStefano Garzarella
780809c319fSStefano Garzarella ret = blkio_connect(s->blkio);
781809c319fSStefano Garzarella }
782809c319fSStefano Garzarella
78369785d66SStefano Garzarella if (ret < 0) {
78469785d66SStefano Garzarella error_setg_errno(errp, -ret, "blkio_connect failed: %s",
78569785d66SStefano Garzarella blkio_get_error_msg());
78669785d66SStefano Garzarella return ret;
78769785d66SStefano Garzarella }
78869785d66SStefano Garzarella
789cad2ccc3SStefano Garzarella qdict_del(options, "path");
790cad2ccc3SStefano Garzarella
791fd66dbd4SStefan Hajnoczi return 0;
792fd66dbd4SStefan Hajnoczi }
793fd66dbd4SStefan Hajnoczi
blkio_open(BlockDriverState * bs,QDict * options,int flags,Error ** errp)794d656aaa1SPaolo Bonzini static int blkio_open(BlockDriverState *bs, QDict *options, int flags,
795fd66dbd4SStefan Hajnoczi Error **errp)
796fd66dbd4SStefan Hajnoczi {
797fd66dbd4SStefan Hajnoczi const char *blkio_driver = bs->drv->protocol_name;
798fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
799fd66dbd4SStefan Hajnoczi int ret;
800fd66dbd4SStefan Hajnoczi
801fd66dbd4SStefan Hajnoczi ret = blkio_create(blkio_driver, &s->blkio);
802fd66dbd4SStefan Hajnoczi if (ret < 0) {
803fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "blkio_create failed: %s",
804fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
805fd66dbd4SStefan Hajnoczi return ret;
806fd66dbd4SStefan Hajnoczi }
807fd66dbd4SStefan Hajnoczi
808fd66dbd4SStefan Hajnoczi if (!(flags & BDRV_O_RDWR)) {
809fd66dbd4SStefan Hajnoczi ret = blkio_set_bool(s->blkio, "read-only", true);
810fd66dbd4SStefan Hajnoczi if (ret < 0) {
811fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to set read-only: %s",
812fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
813fd66dbd4SStefan Hajnoczi blkio_destroy(&s->blkio);
814fd66dbd4SStefan Hajnoczi return ret;
815fd66dbd4SStefan Hajnoczi }
816fd66dbd4SStefan Hajnoczi }
817fd66dbd4SStefan Hajnoczi
81869785d66SStefano Garzarella if (strcmp(blkio_driver, "io_uring") == 0) {
81969785d66SStefano Garzarella ret = blkio_io_uring_connect(bs, options, flags, errp);
82069785d66SStefano Garzarella } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
82169785d66SStefano Garzarella ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
82269785d66SStefano Garzarella } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
82369785d66SStefano Garzarella ret = blkio_virtio_blk_connect(bs, options, flags, errp);
82469785d66SStefano Garzarella } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
82569785d66SStefano Garzarella ret = blkio_virtio_blk_connect(bs, options, flags, errp);
82669785d66SStefano Garzarella } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
82769785d66SStefano Garzarella ret = blkio_virtio_blk_connect(bs, options, flags, errp);
82869785d66SStefano Garzarella } else {
82969785d66SStefano Garzarella g_assert_not_reached();
83069785d66SStefano Garzarella }
831fd66dbd4SStefan Hajnoczi if (ret < 0) {
832fd66dbd4SStefan Hajnoczi blkio_destroy(&s->blkio);
833fd66dbd4SStefan Hajnoczi return ret;
834fd66dbd4SStefan Hajnoczi }
835fd66dbd4SStefan Hajnoczi
836fd66dbd4SStefan Hajnoczi ret = blkio_get_bool(s->blkio,
837fd66dbd4SStefan Hajnoczi "needs-mem-regions",
838fd66dbd4SStefan Hajnoczi &s->needs_mem_regions);
839fd66dbd4SStefan Hajnoczi if (ret < 0) {
840fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret,
841fd66dbd4SStefan Hajnoczi "failed to get needs-mem-regions: %s",
842fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
843fd66dbd4SStefan Hajnoczi blkio_destroy(&s->blkio);
844fd66dbd4SStefan Hajnoczi return ret;
845fd66dbd4SStefan Hajnoczi }
846fd66dbd4SStefan Hajnoczi
847c5640b3eSStefan Hajnoczi ret = blkio_get_bool(s->blkio,
848c5640b3eSStefan Hajnoczi "needs-mem-region-fd",
849c5640b3eSStefan Hajnoczi &s->needs_mem_region_fd);
850c5640b3eSStefan Hajnoczi if (ret < 0) {
851c5640b3eSStefan Hajnoczi error_setg_errno(errp, -ret,
852c5640b3eSStefan Hajnoczi "failed to get needs-mem-region-fd: %s",
853c5640b3eSStefan Hajnoczi blkio_get_error_msg());
854c5640b3eSStefan Hajnoczi blkio_destroy(&s->blkio);
855c5640b3eSStefan Hajnoczi return ret;
856c5640b3eSStefan Hajnoczi }
857c5640b3eSStefan Hajnoczi
858fd66dbd4SStefan Hajnoczi ret = blkio_get_uint64(s->blkio,
859fd66dbd4SStefan Hajnoczi "mem-region-alignment",
860fd66dbd4SStefan Hajnoczi &s->mem_region_alignment);
861fd66dbd4SStefan Hajnoczi if (ret < 0) {
862fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret,
863fd66dbd4SStefan Hajnoczi "failed to get mem-region-alignment: %s",
864fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
865fd66dbd4SStefan Hajnoczi blkio_destroy(&s->blkio);
866fd66dbd4SStefan Hajnoczi return ret;
867fd66dbd4SStefan Hajnoczi }
868fd66dbd4SStefan Hajnoczi
869c5640b3eSStefan Hajnoczi ret = blkio_get_bool(s->blkio,
870c5640b3eSStefan Hajnoczi "may-pin-mem-regions",
871c5640b3eSStefan Hajnoczi &s->may_pin_mem_regions);
872c5640b3eSStefan Hajnoczi if (ret < 0) {
873c5640b3eSStefan Hajnoczi /* Be conservative (assume pinning) if the property is not supported */
874c5640b3eSStefan Hajnoczi s->may_pin_mem_regions = s->needs_mem_regions;
875c5640b3eSStefan Hajnoczi }
876c5640b3eSStefan Hajnoczi
877c5640b3eSStefan Hajnoczi /*
878c5640b3eSStefan Hajnoczi * Notify if libblkio drivers pin memory and prevent features like
879c5640b3eSStefan Hajnoczi * virtio-mem from working.
880c5640b3eSStefan Hajnoczi */
881c5640b3eSStefan Hajnoczi if (s->may_pin_mem_regions) {
882c5640b3eSStefan Hajnoczi ret = ram_block_discard_disable(true);
883c5640b3eSStefan Hajnoczi if (ret < 0) {
884c5640b3eSStefan Hajnoczi error_setg_errno(errp, -ret, "ram_block_discard_disable() failed");
885c5640b3eSStefan Hajnoczi blkio_destroy(&s->blkio);
886c5640b3eSStefan Hajnoczi return ret;
887c5640b3eSStefan Hajnoczi }
888c5640b3eSStefan Hajnoczi }
889c5640b3eSStefan Hajnoczi
890fd66dbd4SStefan Hajnoczi ret = blkio_start(s->blkio);
891fd66dbd4SStefan Hajnoczi if (ret < 0) {
892fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "blkio_start failed: %s",
893fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
894fd66dbd4SStefan Hajnoczi blkio_destroy(&s->blkio);
895c5640b3eSStefan Hajnoczi if (s->may_pin_mem_regions) {
896c5640b3eSStefan Hajnoczi ram_block_discard_disable(false);
897c5640b3eSStefan Hajnoczi }
898fd66dbd4SStefan Hajnoczi return ret;
899fd66dbd4SStefan Hajnoczi }
900fd66dbd4SStefan Hajnoczi
901c5640b3eSStefan Hajnoczi bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
902*547c4e50SStefano Garzarella bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
903*547c4e50SStefano Garzarella #ifdef CONFIG_BLKIO_WRITE_ZEROS_FUA
904*547c4e50SStefano Garzarella bs->supported_zero_flags |= BDRV_REQ_FUA;
905*547c4e50SStefano Garzarella #endif
906fd66dbd4SStefan Hajnoczi
907fd66dbd4SStefan Hajnoczi qemu_mutex_init(&s->blkio_lock);
908fd66dbd4SStefan Hajnoczi qemu_co_mutex_init(&s->bounce_lock);
909fd66dbd4SStefan Hajnoczi qemu_co_queue_init(&s->bounce_available);
910fd66dbd4SStefan Hajnoczi QLIST_INIT(&s->bounce_bufs);
911fd66dbd4SStefan Hajnoczi s->blkioq = blkio_get_queue(s->blkio, 0);
912fd66dbd4SStefan Hajnoczi s->completion_fd = blkioq_get_completion_fd(s->blkioq);
9139359c459SStefano Garzarella blkioq_set_completion_fd_enabled(s->blkioq, true);
914fd66dbd4SStefan Hajnoczi
915fd66dbd4SStefan Hajnoczi blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
916fd66dbd4SStefan Hajnoczi return 0;
917fd66dbd4SStefan Hajnoczi }
918fd66dbd4SStefan Hajnoczi
blkio_close(BlockDriverState * bs)919fd66dbd4SStefan Hajnoczi static void blkio_close(BlockDriverState *bs)
920fd66dbd4SStefan Hajnoczi {
921fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
922fd66dbd4SStefan Hajnoczi
923fd66dbd4SStefan Hajnoczi /* There is no destroy() API for s->bounce_lock */
924fd66dbd4SStefan Hajnoczi
925fd66dbd4SStefan Hajnoczi qemu_mutex_destroy(&s->blkio_lock);
926fd66dbd4SStefan Hajnoczi blkio_detach_aio_context(bs);
927fd66dbd4SStefan Hajnoczi blkio_destroy(&s->blkio);
928c5640b3eSStefan Hajnoczi
929c5640b3eSStefan Hajnoczi if (s->may_pin_mem_regions) {
930c5640b3eSStefan Hajnoczi ram_block_discard_disable(false);
931c5640b3eSStefan Hajnoczi }
932fd66dbd4SStefan Hajnoczi }
933fd66dbd4SStefan Hajnoczi
blkio_co_getlength(BlockDriverState * bs)934c86422c5SEmanuele Giuseppe Esposito static int64_t coroutine_fn blkio_co_getlength(BlockDriverState *bs)
935fd66dbd4SStefan Hajnoczi {
936fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
937fd66dbd4SStefan Hajnoczi uint64_t capacity;
938fd66dbd4SStefan Hajnoczi int ret;
939fd66dbd4SStefan Hajnoczi
940fd66dbd4SStefan Hajnoczi WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
941fd66dbd4SStefan Hajnoczi ret = blkio_get_uint64(s->blkio, "capacity", &capacity);
942fd66dbd4SStefan Hajnoczi }
943fd66dbd4SStefan Hajnoczi if (ret < 0) {
944fd66dbd4SStefan Hajnoczi return -ret;
945fd66dbd4SStefan Hajnoczi }
946fd66dbd4SStefan Hajnoczi
947fd66dbd4SStefan Hajnoczi return capacity;
948fd66dbd4SStefan Hajnoczi }
949fd66dbd4SStefan Hajnoczi
blkio_truncate(BlockDriverState * bs,int64_t offset,bool exact,PreallocMode prealloc,BdrvRequestFlags flags,Error ** errp)9504c8f4fdaSAlberto Faria static int coroutine_fn blkio_truncate(BlockDriverState *bs, int64_t offset,
9514c8f4fdaSAlberto Faria bool exact, PreallocMode prealloc,
9524c8f4fdaSAlberto Faria BdrvRequestFlags flags, Error **errp)
9534c8f4fdaSAlberto Faria {
9544c8f4fdaSAlberto Faria int64_t current_length;
9554c8f4fdaSAlberto Faria
9564c8f4fdaSAlberto Faria if (prealloc != PREALLOC_MODE_OFF) {
9574c8f4fdaSAlberto Faria error_setg(errp, "Unsupported preallocation mode '%s'",
9584c8f4fdaSAlberto Faria PreallocMode_str(prealloc));
9594c8f4fdaSAlberto Faria return -ENOTSUP;
9604c8f4fdaSAlberto Faria }
9614c8f4fdaSAlberto Faria
962c86422c5SEmanuele Giuseppe Esposito current_length = blkio_co_getlength(bs);
9634c8f4fdaSAlberto Faria
9644c8f4fdaSAlberto Faria if (offset > current_length) {
9654c8f4fdaSAlberto Faria error_setg(errp, "Cannot grow device");
9664c8f4fdaSAlberto Faria return -EINVAL;
9674c8f4fdaSAlberto Faria } else if (exact && offset != current_length) {
9684c8f4fdaSAlberto Faria error_setg(errp, "Cannot resize device");
9694c8f4fdaSAlberto Faria return -ENOTSUP;
9704c8f4fdaSAlberto Faria }
9714c8f4fdaSAlberto Faria
9724c8f4fdaSAlberto Faria return 0;
9734c8f4fdaSAlberto Faria }
9744c8f4fdaSAlberto Faria
9753d47eb0aSEmanuele Giuseppe Esposito static int coroutine_fn
blkio_co_get_info(BlockDriverState * bs,BlockDriverInfo * bdi)9763d47eb0aSEmanuele Giuseppe Esposito blkio_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
977fd66dbd4SStefan Hajnoczi {
978fd66dbd4SStefan Hajnoczi return 0;
979fd66dbd4SStefan Hajnoczi }
980fd66dbd4SStefan Hajnoczi
blkio_refresh_limits(BlockDriverState * bs,Error ** errp)981fd66dbd4SStefan Hajnoczi static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
982fd66dbd4SStefan Hajnoczi {
983fd66dbd4SStefan Hajnoczi BDRVBlkioState *s = bs->opaque;
984fd66dbd4SStefan Hajnoczi QEMU_LOCK_GUARD(&s->blkio_lock);
985fd66dbd4SStefan Hajnoczi int value;
986fd66dbd4SStefan Hajnoczi int ret;
987fd66dbd4SStefan Hajnoczi
988fd66dbd4SStefan Hajnoczi ret = blkio_get_int(s->blkio, "request-alignment", &value);
989fd66dbd4SStefan Hajnoczi if (ret < 0) {
990fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s",
991fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
992fd66dbd4SStefan Hajnoczi return;
993fd66dbd4SStefan Hajnoczi }
994fd66dbd4SStefan Hajnoczi bs->bl.request_alignment = value;
995fd66dbd4SStefan Hajnoczi if (bs->bl.request_alignment < 1 ||
996fd66dbd4SStefan Hajnoczi bs->bl.request_alignment >= INT_MAX ||
997fd66dbd4SStefan Hajnoczi !is_power_of_2(bs->bl.request_alignment)) {
998fd66dbd4SStefan Hajnoczi error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", "
999fd66dbd4SStefan Hajnoczi "must be a power of 2 less than INT_MAX",
1000fd66dbd4SStefan Hajnoczi bs->bl.request_alignment);
1001fd66dbd4SStefan Hajnoczi return;
1002fd66dbd4SStefan Hajnoczi }
1003fd66dbd4SStefan Hajnoczi
1004fd66dbd4SStefan Hajnoczi ret = blkio_get_int(s->blkio, "optimal-io-size", &value);
1005fd66dbd4SStefan Hajnoczi if (ret < 0) {
1006fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s",
1007fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
1008fd66dbd4SStefan Hajnoczi return;
1009fd66dbd4SStefan Hajnoczi }
1010fd66dbd4SStefan Hajnoczi bs->bl.opt_transfer = value;
1011fd66dbd4SStefan Hajnoczi if (bs->bl.opt_transfer > INT_MAX ||
1012fd66dbd4SStefan Hajnoczi (bs->bl.opt_transfer % bs->bl.request_alignment)) {
1013fd66dbd4SStefan Hajnoczi error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must "
1014fd66dbd4SStefan Hajnoczi "be a multiple of %" PRIu32, bs->bl.opt_transfer,
1015fd66dbd4SStefan Hajnoczi bs->bl.request_alignment);
1016fd66dbd4SStefan Hajnoczi return;
1017fd66dbd4SStefan Hajnoczi }
1018fd66dbd4SStefan Hajnoczi
1019fd66dbd4SStefan Hajnoczi ret = blkio_get_int(s->blkio, "max-transfer", &value);
1020fd66dbd4SStefan Hajnoczi if (ret < 0) {
1021fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s",
1022fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
1023fd66dbd4SStefan Hajnoczi return;
1024fd66dbd4SStefan Hajnoczi }
1025fd66dbd4SStefan Hajnoczi bs->bl.max_transfer = value;
1026fd66dbd4SStefan Hajnoczi if ((bs->bl.max_transfer % bs->bl.request_alignment) ||
1027fd66dbd4SStefan Hajnoczi (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) {
1028fd66dbd4SStefan Hajnoczi error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be "
1029fd66dbd4SStefan Hajnoczi "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)",
1030fd66dbd4SStefan Hajnoczi bs->bl.max_transfer, bs->bl.request_alignment,
1031fd66dbd4SStefan Hajnoczi bs->bl.opt_transfer);
1032fd66dbd4SStefan Hajnoczi return;
1033fd66dbd4SStefan Hajnoczi }
1034fd66dbd4SStefan Hajnoczi
1035fd66dbd4SStefan Hajnoczi ret = blkio_get_int(s->blkio, "buf-alignment", &value);
1036fd66dbd4SStefan Hajnoczi if (ret < 0) {
1037fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s",
1038fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
1039fd66dbd4SStefan Hajnoczi return;
1040fd66dbd4SStefan Hajnoczi }
1041fd66dbd4SStefan Hajnoczi if (value < 1) {
1042fd66dbd4SStefan Hajnoczi error_setg(errp, "invalid \"buf-alignment\" value %d, must be "
1043fd66dbd4SStefan Hajnoczi "positive", value);
1044fd66dbd4SStefan Hajnoczi return;
1045fd66dbd4SStefan Hajnoczi }
1046fd66dbd4SStefan Hajnoczi bs->bl.min_mem_alignment = value;
1047fd66dbd4SStefan Hajnoczi
1048fd66dbd4SStefan Hajnoczi ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value);
1049fd66dbd4SStefan Hajnoczi if (ret < 0) {
1050fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret,
1051fd66dbd4SStefan Hajnoczi "failed to get \"optimal-buf-alignment\": %s",
1052fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
1053fd66dbd4SStefan Hajnoczi return;
1054fd66dbd4SStefan Hajnoczi }
1055fd66dbd4SStefan Hajnoczi if (value < 1) {
1056fd66dbd4SStefan Hajnoczi error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, "
1057fd66dbd4SStefan Hajnoczi "must be positive", value);
1058fd66dbd4SStefan Hajnoczi return;
1059fd66dbd4SStefan Hajnoczi }
1060fd66dbd4SStefan Hajnoczi bs->bl.opt_mem_alignment = value;
1061fd66dbd4SStefan Hajnoczi
1062fd66dbd4SStefan Hajnoczi ret = blkio_get_int(s->blkio, "max-segments", &value);
1063fd66dbd4SStefan Hajnoczi if (ret < 0) {
1064fd66dbd4SStefan Hajnoczi error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s",
1065fd66dbd4SStefan Hajnoczi blkio_get_error_msg());
1066fd66dbd4SStefan Hajnoczi return;
1067fd66dbd4SStefan Hajnoczi }
1068fd66dbd4SStefan Hajnoczi if (value < 1) {
1069fd66dbd4SStefan Hajnoczi error_setg(errp, "invalid \"max-segments\" value %d, must be positive",
1070fd66dbd4SStefan Hajnoczi value);
1071fd66dbd4SStefan Hajnoczi return;
1072fd66dbd4SStefan Hajnoczi }
1073fd66dbd4SStefan Hajnoczi bs->bl.max_iov = value;
1074fd66dbd4SStefan Hajnoczi }
1075fd66dbd4SStefan Hajnoczi
1076fd66dbd4SStefan Hajnoczi /*
1077fd66dbd4SStefan Hajnoczi * TODO
1078fd66dbd4SStefan Hajnoczi * Missing libblkio APIs:
1079fd66dbd4SStefan Hajnoczi * - block_status
1080fd66dbd4SStefan Hajnoczi * - co_invalidate_cache
1081fd66dbd4SStefan Hajnoczi *
1082fd66dbd4SStefan Hajnoczi * Out of scope?
1083fd66dbd4SStefan Hajnoczi * - create
1084fd66dbd4SStefan Hajnoczi * - truncate
1085fd66dbd4SStefan Hajnoczi */
1086fd66dbd4SStefan Hajnoczi
1087c21eae1cSStefan Hajnoczi /*
1088c21eae1cSStefan Hajnoczi * Do not include .format_name and .protocol_name because module_block.py
1089c21eae1cSStefan Hajnoczi * does not parse macros in the source code.
1090c21eae1cSStefan Hajnoczi */
1091c21eae1cSStefan Hajnoczi #define BLKIO_DRIVER_COMMON \
1092fd66dbd4SStefan Hajnoczi .instance_size = sizeof(BDRVBlkioState), \
1093d656aaa1SPaolo Bonzini .bdrv_open = blkio_open, \
1094fd66dbd4SStefan Hajnoczi .bdrv_close = blkio_close, \
1095c86422c5SEmanuele Giuseppe Esposito .bdrv_co_getlength = blkio_co_getlength, \
10964c8f4fdaSAlberto Faria .bdrv_co_truncate = blkio_truncate, \
10973d47eb0aSEmanuele Giuseppe Esposito .bdrv_co_get_info = blkio_co_get_info, \
1098fd66dbd4SStefan Hajnoczi .bdrv_attach_aio_context = blkio_attach_aio_context, \
1099fd66dbd4SStefan Hajnoczi .bdrv_detach_aio_context = blkio_detach_aio_context, \
1100fd66dbd4SStefan Hajnoczi .bdrv_co_pdiscard = blkio_co_pdiscard, \
1101fd66dbd4SStefan Hajnoczi .bdrv_co_preadv = blkio_co_preadv, \
1102fd66dbd4SStefan Hajnoczi .bdrv_co_pwritev = blkio_co_pwritev, \
1103fd66dbd4SStefan Hajnoczi .bdrv_co_flush_to_disk = blkio_co_flush, \
1104fd66dbd4SStefan Hajnoczi .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
1105fd66dbd4SStefan Hajnoczi .bdrv_refresh_limits = blkio_refresh_limits, \
1106c5640b3eSStefan Hajnoczi .bdrv_register_buf = blkio_register_buf, \
1107c21eae1cSStefan Hajnoczi .bdrv_unregister_buf = blkio_unregister_buf,
1108fd66dbd4SStefan Hajnoczi
1109c21eae1cSStefan Hajnoczi /*
1110c21eae1cSStefan Hajnoczi * Use the same .format_name and .protocol_name as the libblkio driver name for
1111c21eae1cSStefan Hajnoczi * consistency.
1112c21eae1cSStefan Hajnoczi */
1113c21eae1cSStefan Hajnoczi
1114c21eae1cSStefan Hajnoczi static BlockDriver bdrv_io_uring = {
1115c21eae1cSStefan Hajnoczi .format_name = "io_uring",
1116c21eae1cSStefan Hajnoczi .protocol_name = "io_uring",
1117fd66dbd4SStefan Hajnoczi .bdrv_needs_filename = true,
1118c21eae1cSStefan Hajnoczi BLKIO_DRIVER_COMMON
1119c21eae1cSStefan Hajnoczi };
1120fd66dbd4SStefan Hajnoczi
1121c21eae1cSStefan Hajnoczi static BlockDriver bdrv_nvme_io_uring = {
1122c21eae1cSStefan Hajnoczi .format_name = "nvme-io_uring",
1123c21eae1cSStefan Hajnoczi .protocol_name = "nvme-io_uring",
1124c21eae1cSStefan Hajnoczi BLKIO_DRIVER_COMMON
1125c21eae1cSStefan Hajnoczi };
1126fd66dbd4SStefan Hajnoczi
1127c21eae1cSStefan Hajnoczi static BlockDriver bdrv_virtio_blk_vfio_pci = {
1128c21eae1cSStefan Hajnoczi .format_name = "virtio-blk-vfio-pci",
1129c21eae1cSStefan Hajnoczi .protocol_name = "virtio-blk-vfio-pci",
1130c21eae1cSStefan Hajnoczi BLKIO_DRIVER_COMMON
1131c21eae1cSStefan Hajnoczi };
113203d9e4c0SAlberto Faria
1133c21eae1cSStefan Hajnoczi static BlockDriver bdrv_virtio_blk_vhost_user = {
1134c21eae1cSStefan Hajnoczi .format_name = "virtio-blk-vhost-user",
1135c21eae1cSStefan Hajnoczi .protocol_name = "virtio-blk-vhost-user",
1136c21eae1cSStefan Hajnoczi BLKIO_DRIVER_COMMON
1137c21eae1cSStefan Hajnoczi };
1138fd66dbd4SStefan Hajnoczi
1139c21eae1cSStefan Hajnoczi static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
1140c21eae1cSStefan Hajnoczi .format_name = "virtio-blk-vhost-vdpa",
1141c21eae1cSStefan Hajnoczi .protocol_name = "virtio-blk-vhost-vdpa",
1142c21eae1cSStefan Hajnoczi BLKIO_DRIVER_COMMON
1143c21eae1cSStefan Hajnoczi };
1144fd66dbd4SStefan Hajnoczi
bdrv_blkio_init(void)1145fd66dbd4SStefan Hajnoczi static void bdrv_blkio_init(void)
1146fd66dbd4SStefan Hajnoczi {
1147fd66dbd4SStefan Hajnoczi bdrv_register(&bdrv_io_uring);
1148fd66dbd4SStefan Hajnoczi bdrv_register(&bdrv_nvme_io_uring);
114903d9e4c0SAlberto Faria bdrv_register(&bdrv_virtio_blk_vfio_pci);
1150fd66dbd4SStefan Hajnoczi bdrv_register(&bdrv_virtio_blk_vhost_user);
1151fd66dbd4SStefan Hajnoczi bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
1152fd66dbd4SStefan Hajnoczi }
1153fd66dbd4SStefan Hajnoczi
1154fd66dbd4SStefan Hajnoczi block_init(bdrv_blkio_init);
1155