xref: /openbmc/qemu/block/export/vduse-blk.c (revision 2a8af382)
1 /*
2  * Export QEMU block device via VDUSE
3  *
4  * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
5  *
6  * Author:
7  *   Xie Yongji <xieyongji@bytedance.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  */
12 
13 #include <sys/eventfd.h>
14 
15 #include "qemu/osdep.h"
16 #include "qapi/error.h"
17 #include "block/export.h"
18 #include "qemu/error-report.h"
19 #include "util/block-helpers.h"
20 #include "subprojects/libvduse/libvduse.h"
21 #include "virtio-blk-handler.h"
22 
23 #include "standard-headers/linux/virtio_blk.h"
24 
25 #define VDUSE_DEFAULT_NUM_QUEUE 1
26 #define VDUSE_DEFAULT_QUEUE_SIZE 256
27 
28 typedef struct VduseBlkExport {
29     BlockExport export;
30     VirtioBlkHandler handler;
31     VduseDev *dev;
32     uint16_t num_queues;
33     char *recon_file;
34     unsigned int inflight;
35 } VduseBlkExport;
36 
37 typedef struct VduseBlkReq {
38     VduseVirtqElement elem;
39     VduseVirtq *vq;
40 } VduseBlkReq;
41 
42 static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
43 {
44     vblk_exp->inflight++;
45 }
46 
47 static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
48 {
49     if (--vblk_exp->inflight == 0) {
50         aio_wait_kick();
51     }
52 }
53 
54 static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
55 {
56     vduse_queue_push(req->vq, &req->elem, in_len);
57     vduse_queue_notify(req->vq);
58 
59     free(req);
60 }
61 
62 static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
63 {
64     VduseBlkReq *req = opaque;
65     VduseVirtq *vq = req->vq;
66     VduseDev *dev = vduse_queue_get_dev(vq);
67     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
68     VirtioBlkHandler *handler = &vblk_exp->handler;
69     VduseVirtqElement *elem = &req->elem;
70     struct iovec *in_iov = elem->in_sg;
71     struct iovec *out_iov = elem->out_sg;
72     unsigned in_num = elem->in_num;
73     unsigned out_num = elem->out_num;
74     int in_len;
75 
76     in_len = virtio_blk_process_req(handler, in_iov,
77                                     out_iov, in_num, out_num);
78     if (in_len < 0) {
79         free(req);
80         return;
81     }
82 
83     vduse_blk_req_complete(req, in_len);
84     vduse_blk_inflight_dec(vblk_exp);
85 }
86 
87 static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
88 {
89     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
90 
91     while (1) {
92         VduseBlkReq *req;
93 
94         req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
95         if (!req) {
96             break;
97         }
98         req->vq = vq;
99 
100         Coroutine *co =
101             qemu_coroutine_create(vduse_blk_virtio_process_req, req);
102 
103         vduse_blk_inflight_inc(vblk_exp);
104         qemu_coroutine_enter(co);
105     }
106 }
107 
108 static void on_vduse_vq_kick(void *opaque)
109 {
110     VduseVirtq *vq = opaque;
111     VduseDev *dev = vduse_queue_get_dev(vq);
112     int fd = vduse_queue_get_fd(vq);
113     eventfd_t kick_data;
114 
115     if (eventfd_read(fd, &kick_data) == -1) {
116         error_report("failed to read data from eventfd");
117         return;
118     }
119 
120     vduse_blk_vq_handler(dev, vq);
121 }
122 
123 static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
124 {
125     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
126 
127     aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
128                        true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
129     /* Make sure we don't miss any kick afer reconnecting */
130     eventfd_write(vduse_queue_get_fd(vq), 1);
131 }
132 
133 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
134 {
135     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
136 
137     aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
138                        true, NULL, NULL, NULL, NULL, NULL);
139 }
140 
141 static const VduseOps vduse_blk_ops = {
142     .enable_queue = vduse_blk_enable_queue,
143     .disable_queue = vduse_blk_disable_queue,
144 };
145 
146 static void on_vduse_dev_kick(void *opaque)
147 {
148     VduseDev *dev = opaque;
149 
150     vduse_dev_handler(dev);
151 }
152 
153 static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
154 {
155     int i;
156 
157     aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
158                        true, on_vduse_dev_kick, NULL, NULL, NULL,
159                        vblk_exp->dev);
160 
161     for (i = 0; i < vblk_exp->num_queues; i++) {
162         VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
163         int fd = vduse_queue_get_fd(vq);
164 
165         if (fd < 0) {
166             continue;
167         }
168         aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
169                            on_vduse_vq_kick, NULL, NULL, NULL, vq);
170     }
171 }
172 
173 static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
174 {
175     int i;
176 
177     for (i = 0; i < vblk_exp->num_queues; i++) {
178         VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
179         int fd = vduse_queue_get_fd(vq);
180 
181         if (fd < 0) {
182             continue;
183         }
184         aio_set_fd_handler(vblk_exp->export.ctx, fd,
185                            true, NULL, NULL, NULL, NULL, NULL);
186     }
187     aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
188                        true, NULL, NULL, NULL, NULL, NULL);
189 
190     AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
191 }
192 
193 
194 static void blk_aio_attached(AioContext *ctx, void *opaque)
195 {
196     VduseBlkExport *vblk_exp = opaque;
197 
198     vblk_exp->export.ctx = ctx;
199     vduse_blk_attach_ctx(vblk_exp, ctx);
200 }
201 
202 static void blk_aio_detach(void *opaque)
203 {
204     VduseBlkExport *vblk_exp = opaque;
205 
206     vduse_blk_detach_ctx(vblk_exp);
207     vblk_exp->export.ctx = NULL;
208 }
209 
210 static void vduse_blk_resize(void *opaque)
211 {
212     BlockExport *exp = opaque;
213     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
214     struct virtio_blk_config config;
215 
216     config.capacity =
217             cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
218     vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
219                             offsetof(struct virtio_blk_config, capacity),
220                             (char *)&config.capacity);
221 }
222 
223 static const BlockDevOps vduse_block_ops = {
224     .resize_cb = vduse_blk_resize,
225 };
226 
227 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
228                                 Error **errp)
229 {
230     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
231     BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
232     uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
233     uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
234     uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
235     Error *local_err = NULL;
236     struct virtio_blk_config config = { 0 };
237     uint64_t features;
238     int i, ret;
239 
240     if (vblk_opts->has_num_queues) {
241         num_queues = vblk_opts->num_queues;
242         if (num_queues == 0) {
243             error_setg(errp, "num-queues must be greater than 0");
244             return -EINVAL;
245         }
246     }
247 
248     if (vblk_opts->has_queue_size) {
249         queue_size = vblk_opts->queue_size;
250         if (queue_size <= 2 || !is_power_of_2(queue_size) ||
251             queue_size > VIRTQUEUE_MAX_SIZE) {
252             error_setg(errp, "queue-size is invalid");
253             return -EINVAL;
254         }
255     }
256 
257     if (vblk_opts->has_logical_block_size) {
258         logical_block_size = vblk_opts->logical_block_size;
259         check_block_size(exp->id, "logical-block-size", logical_block_size,
260                          &local_err);
261         if (local_err) {
262             error_propagate(errp, local_err);
263             return -EINVAL;
264         }
265     }
266     vblk_exp->num_queues = num_queues;
267     vblk_exp->handler.blk = exp->blk;
268     vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
269     vblk_exp->handler.logical_block_size = logical_block_size;
270     vblk_exp->handler.writable = opts->writable;
271 
272     config.capacity =
273             cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
274     config.seg_max = cpu_to_le32(queue_size - 2);
275     config.min_io_size = cpu_to_le16(1);
276     config.opt_io_size = cpu_to_le32(1);
277     config.num_queues = cpu_to_le16(num_queues);
278     config.blk_size = cpu_to_le32(logical_block_size);
279     config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
280     config.max_discard_seg = cpu_to_le32(1);
281     config.discard_sector_alignment =
282         cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
283     config.max_write_zeroes_sectors =
284         cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
285     config.max_write_zeroes_seg = cpu_to_le32(1);
286 
287     features = vduse_get_virtio_features() |
288                (1ULL << VIRTIO_BLK_F_SEG_MAX) |
289                (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
290                (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
291                (1ULL << VIRTIO_BLK_F_FLUSH) |
292                (1ULL << VIRTIO_BLK_F_DISCARD) |
293                (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
294 
295     if (num_queues > 1) {
296         features |= 1ULL << VIRTIO_BLK_F_MQ;
297     }
298     if (!opts->writable) {
299         features |= 1ULL << VIRTIO_BLK_F_RO;
300     }
301 
302     vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
303                                      features, num_queues,
304                                      sizeof(struct virtio_blk_config),
305                                      (char *)&config, &vduse_blk_ops,
306                                      vblk_exp);
307     if (!vblk_exp->dev) {
308         error_setg(errp, "failed to create vduse device");
309         ret = -ENOMEM;
310         goto err_dev;
311     }
312 
313     vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
314                                            g_get_tmp_dir(), vblk_opts->name);
315     if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
316         error_setg(errp, "failed to set reconnect log file");
317         ret = -EINVAL;
318         goto err;
319     }
320 
321     for (i = 0; i < num_queues; i++) {
322         vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
323     }
324 
325     aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
326                        on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
327 
328     blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
329                                  vblk_exp);
330 
331     blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
332 
333     return 0;
334 err:
335     vduse_dev_destroy(vblk_exp->dev);
336     g_free(vblk_exp->recon_file);
337 err_dev:
338     g_free(vblk_exp->handler.serial);
339     return ret;
340 }
341 
342 static void vduse_blk_exp_delete(BlockExport *exp)
343 {
344     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
345     int ret;
346 
347     blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
348                                     vblk_exp);
349     blk_set_dev_ops(exp->blk, NULL, NULL);
350     ret = vduse_dev_destroy(vblk_exp->dev);
351     if (ret != -EBUSY) {
352         unlink(vblk_exp->recon_file);
353     }
354     g_free(vblk_exp->recon_file);
355     g_free(vblk_exp->handler.serial);
356 }
357 
358 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
359 {
360     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
361 
362     aio_context_acquire(vblk_exp->export.ctx);
363     vduse_blk_detach_ctx(vblk_exp);
364     aio_context_acquire(vblk_exp->export.ctx);
365 }
366 
367 const BlockExportDriver blk_exp_vduse_blk = {
368     .type               = BLOCK_EXPORT_TYPE_VDUSE_BLK,
369     .instance_size      = sizeof(VduseBlkExport),
370     .create             = vduse_blk_exp_create,
371     .delete             = vduse_blk_exp_delete,
372     .request_shutdown   = vduse_blk_exp_request_shutdown,
373 };
374