xref: /openbmc/qemu/hw/block/vhost-user-blk.c (revision 3f7febc9)
1 /*
2  * vhost-user-blk host device
3  *
4  * Copyright(C) 2017 Intel Corporation.
5  *
6  * Authors:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by:
10  * Felipe Franciosi <felipe@nutanix.com>
11  * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
12  * Nicholas Bellinger <nab@risingtidesystems.com>
13  *
14  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
15  * See the COPYING.LIB file in the top-level directory.
16  *
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #include "qemu/error-report.h"
22 #include "qemu/cutils.h"
23 #include "hw/qdev-core.h"
24 #include "hw/qdev-properties.h"
25 #include "hw/qdev-properties-system.h"
26 #include "hw/virtio/virtio-blk-common.h"
27 #include "hw/virtio/vhost.h"
28 #include "hw/virtio/vhost-user-blk.h"
29 #include "hw/virtio/virtio.h"
30 #include "hw/virtio/virtio-bus.h"
31 #include "hw/virtio/virtio-access.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/runstate.h"
34 
35 #define REALIZE_CONNECTION_RETRIES 3
36 
37 static const int user_feature_bits[] = {
38     VIRTIO_BLK_F_SIZE_MAX,
39     VIRTIO_BLK_F_SEG_MAX,
40     VIRTIO_BLK_F_GEOMETRY,
41     VIRTIO_BLK_F_BLK_SIZE,
42     VIRTIO_BLK_F_TOPOLOGY,
43     VIRTIO_BLK_F_MQ,
44     VIRTIO_BLK_F_RO,
45     VIRTIO_BLK_F_FLUSH,
46     VIRTIO_BLK_F_CONFIG_WCE,
47     VIRTIO_BLK_F_DISCARD,
48     VIRTIO_BLK_F_WRITE_ZEROES,
49     VIRTIO_F_VERSION_1,
50     VIRTIO_RING_F_INDIRECT_DESC,
51     VIRTIO_RING_F_EVENT_IDX,
52     VIRTIO_F_NOTIFY_ON_EMPTY,
53     VIRTIO_F_RING_PACKED,
54     VIRTIO_F_IOMMU_PLATFORM,
55     VHOST_INVALID_FEATURE_BIT
56 };
57 
58 static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
59 
60 static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
61 {
62     VHostUserBlk *s = VHOST_USER_BLK(vdev);
63 
64     /* Our num_queues overrides the device backend */
65     virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues);
66 
67     memcpy(config, &s->blkcfg, vdev->config_len);
68 }
69 
70 static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
71 {
72     VHostUserBlk *s = VHOST_USER_BLK(vdev);
73     struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
74     int ret;
75 
76     if (blkcfg->wce == s->blkcfg.wce) {
77         return;
78     }
79 
80     ret = vhost_dev_set_config(&s->dev, &blkcfg->wce,
81                                offsetof(struct virtio_blk_config, wce),
82                                sizeof(blkcfg->wce),
83                                VHOST_SET_CONFIG_TYPE_MASTER);
84     if (ret) {
85         error_report("set device config space failed");
86         return;
87     }
88 
89     s->blkcfg.wce = blkcfg->wce;
90 }
91 
92 static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
93 {
94     int ret;
95     struct virtio_blk_config blkcfg;
96     VirtIODevice *vdev = dev->vdev;
97     VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
98     Error *local_err = NULL;
99 
100     ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg,
101                                vdev->config_len, &local_err);
102     if (ret < 0) {
103         error_report_err(local_err);
104         return ret;
105     }
106 
107     /* valid for resize only */
108     if (blkcfg.capacity != s->blkcfg.capacity) {
109         s->blkcfg.capacity = blkcfg.capacity;
110         memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len);
111         virtio_notify_config(dev->vdev);
112     }
113 
114     return 0;
115 }
116 
117 const VhostDevConfigOps blk_ops = {
118     .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
119 };
120 
121 static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp)
122 {
123     VHostUserBlk *s = VHOST_USER_BLK(vdev);
124     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
125     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
126     int i, ret;
127 
128     if (!k->set_guest_notifiers) {
129         error_setg(errp, "binding does not support guest notifiers");
130         return -ENOSYS;
131     }
132 
133     ret = vhost_dev_enable_notifiers(&s->dev, vdev);
134     if (ret < 0) {
135         error_setg_errno(errp, -ret, "Error enabling host notifiers");
136         return ret;
137     }
138 
139     ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
140     if (ret < 0) {
141         error_setg_errno(errp, -ret, "Error binding guest notifier");
142         goto err_host_notifiers;
143     }
144 
145     s->dev.acked_features = vdev->guest_features;
146 
147     ret = vhost_dev_prepare_inflight(&s->dev, vdev);
148     if (ret < 0) {
149         error_setg_errno(errp, -ret, "Error setting inflight format");
150         goto err_guest_notifiers;
151     }
152 
153     if (!s->inflight->addr) {
154         ret = vhost_dev_get_inflight(&s->dev, s->queue_size, s->inflight);
155         if (ret < 0) {
156             error_setg_errno(errp, -ret, "Error getting inflight");
157             goto err_guest_notifiers;
158         }
159     }
160 
161     ret = vhost_dev_set_inflight(&s->dev, s->inflight);
162     if (ret < 0) {
163         error_setg_errno(errp, -ret, "Error setting inflight");
164         goto err_guest_notifiers;
165     }
166 
167     ret = vhost_dev_start(&s->dev, vdev);
168     if (ret < 0) {
169         error_setg_errno(errp, -ret, "Error starting vhost");
170         goto err_guest_notifiers;
171     }
172     s->started_vu = true;
173 
174     /* guest_notifier_mask/pending not used yet, so just unmask
175      * everything here. virtio-pci will do the right thing by
176      * enabling/disabling irqfd.
177      */
178     for (i = 0; i < s->dev.nvqs; i++) {
179         vhost_virtqueue_mask(&s->dev, vdev, i, false);
180     }
181 
182     return ret;
183 
184 err_guest_notifiers:
185     k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
186 err_host_notifiers:
187     vhost_dev_disable_notifiers(&s->dev, vdev);
188     return ret;
189 }
190 
191 static void vhost_user_blk_stop(VirtIODevice *vdev)
192 {
193     VHostUserBlk *s = VHOST_USER_BLK(vdev);
194     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
195     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
196     int ret;
197 
198     if (!s->started_vu) {
199         return;
200     }
201     s->started_vu = false;
202 
203     if (!k->set_guest_notifiers) {
204         return;
205     }
206 
207     vhost_dev_stop(&s->dev, vdev);
208 
209     ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
210     if (ret < 0) {
211         error_report("vhost guest notifier cleanup failed: %d", ret);
212         return;
213     }
214 
215     vhost_dev_disable_notifiers(&s->dev, vdev);
216 }
217 
218 static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
219 {
220     VHostUserBlk *s = VHOST_USER_BLK(vdev);
221     bool should_start = virtio_device_started(vdev, status);
222     Error *local_err = NULL;
223     int ret;
224 
225     if (!vdev->vm_running) {
226         should_start = false;
227     }
228 
229     if (!s->connected) {
230         return;
231     }
232 
233     if (vhost_dev_is_started(&s->dev) == should_start) {
234         return;
235     }
236 
237     if (should_start) {
238         ret = vhost_user_blk_start(vdev, &local_err);
239         if (ret < 0) {
240             error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
241             qemu_chr_fe_disconnect(&s->chardev);
242         }
243     } else {
244         vhost_user_blk_stop(vdev);
245     }
246 
247 }
248 
249 static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
250                                             uint64_t features,
251                                             Error **errp)
252 {
253     VHostUserBlk *s = VHOST_USER_BLK(vdev);
254 
255     /* Turn on pre-defined features */
256     virtio_add_feature(&features, VIRTIO_BLK_F_SIZE_MAX);
257     virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
258     virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
259     virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
260     virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
261     virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH);
262     virtio_add_feature(&features, VIRTIO_BLK_F_RO);
263 
264     if (s->num_queues > 1) {
265         virtio_add_feature(&features, VIRTIO_BLK_F_MQ);
266     }
267 
268     return vhost_get_features(&s->dev, user_feature_bits, features);
269 }
270 
271 static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
272 {
273     VHostUserBlk *s = VHOST_USER_BLK(vdev);
274     Error *local_err = NULL;
275     int i, ret;
276 
277     if (!vdev->start_on_kick) {
278         return;
279     }
280 
281     if (!s->connected) {
282         return;
283     }
284 
285     if (vhost_dev_is_started(&s->dev)) {
286         return;
287     }
288 
289     /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
290      * vhost here instead of waiting for .set_status().
291      */
292     ret = vhost_user_blk_start(vdev, &local_err);
293     if (ret < 0) {
294         error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
295         qemu_chr_fe_disconnect(&s->chardev);
296         return;
297     }
298 
299     /* Kick right away to begin processing requests already in vring */
300     for (i = 0; i < s->dev.nvqs; i++) {
301         VirtQueue *kick_vq = virtio_get_queue(vdev, i);
302 
303         if (!virtio_queue_get_desc_addr(vdev, i)) {
304             continue;
305         }
306         event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
307     }
308 }
309 
310 static void vhost_user_blk_reset(VirtIODevice *vdev)
311 {
312     VHostUserBlk *s = VHOST_USER_BLK(vdev);
313 
314     vhost_dev_free_inflight(s->inflight);
315 }
316 
317 static int vhost_user_blk_connect(DeviceState *dev, Error **errp)
318 {
319     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
320     VHostUserBlk *s = VHOST_USER_BLK(vdev);
321     int ret = 0;
322 
323     if (s->connected) {
324         return 0;
325     }
326     s->connected = true;
327 
328     s->dev.num_queues = s->num_queues;
329     s->dev.nvqs = s->num_queues;
330     s->dev.vqs = s->vhost_vqs;
331     s->dev.vq_index = 0;
332     s->dev.backend_features = 0;
333 
334     vhost_dev_set_config_notifier(&s->dev, &blk_ops);
335 
336     s->vhost_user.supports_config = true;
337     ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0,
338                          errp);
339     if (ret < 0) {
340         return ret;
341     }
342 
343     /* restore vhost state */
344     if (virtio_device_started(vdev, vdev->status)) {
345         ret = vhost_user_blk_start(vdev, errp);
346         if (ret < 0) {
347             return ret;
348         }
349     }
350 
351     return 0;
352 }
353 
354 static void vhost_user_blk_disconnect(DeviceState *dev)
355 {
356     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
357     VHostUserBlk *s = VHOST_USER_BLK(vdev);
358 
359     if (!s->connected) {
360         return;
361     }
362     s->connected = false;
363 
364     vhost_user_blk_stop(vdev);
365 
366     vhost_dev_cleanup(&s->dev);
367 }
368 
369 static void vhost_user_blk_chr_closed_bh(void *opaque)
370 {
371     DeviceState *dev = opaque;
372     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
373     VHostUserBlk *s = VHOST_USER_BLK(vdev);
374 
375     vhost_user_blk_disconnect(dev);
376     qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
377                              NULL, opaque, NULL, true);
378 }
379 
380 static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
381 {
382     DeviceState *dev = opaque;
383     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
384     VHostUserBlk *s = VHOST_USER_BLK(vdev);
385     Error *local_err = NULL;
386 
387     switch (event) {
388     case CHR_EVENT_OPENED:
389         if (vhost_user_blk_connect(dev, &local_err) < 0) {
390             error_report_err(local_err);
391             qemu_chr_fe_disconnect(&s->chardev);
392             return;
393         }
394         break;
395     case CHR_EVENT_CLOSED:
396         if (!runstate_check(RUN_STATE_SHUTDOWN)) {
397             /*
398              * A close event may happen during a read/write, but vhost
399              * code assumes the vhost_dev remains setup, so delay the
400              * stop & clear.
401              */
402             AioContext *ctx = qemu_get_current_aio_context();
403 
404             qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL,
405                     NULL, NULL, false);
406             aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque);
407 
408             /*
409              * Move vhost device to the stopped state. The vhost-user device
410              * will be clean up and disconnected in BH. This can be useful in
411              * the vhost migration code. If disconnect was caught there is an
412              * option for the general vhost code to get the dev state without
413              * knowing its type (in this case vhost-user).
414              *
415              * FIXME: this is sketchy to be reaching into vhost_dev
416              * now because we are forcing something that implies we
417              * have executed vhost_dev_stop() but that won't happen
418              * until vhost_user_blk_stop() gets called from the bh.
419              * Really this state check should be tracked locally.
420              */
421             s->dev.started = false;
422         }
423         break;
424     case CHR_EVENT_BREAK:
425     case CHR_EVENT_MUX_IN:
426     case CHR_EVENT_MUX_OUT:
427         /* Ignore */
428         break;
429     }
430 }
431 
432 static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp)
433 {
434     DeviceState *dev = &s->parent_obj.parent_obj;
435     int ret;
436 
437     s->connected = false;
438 
439     ret = qemu_chr_fe_wait_connected(&s->chardev, errp);
440     if (ret < 0) {
441         return ret;
442     }
443 
444     ret = vhost_user_blk_connect(dev, errp);
445     if (ret < 0) {
446         qemu_chr_fe_disconnect(&s->chardev);
447         return ret;
448     }
449     assert(s->connected);
450 
451     ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
452                                s->parent_obj.config_len, errp);
453     if (ret < 0) {
454         qemu_chr_fe_disconnect(&s->chardev);
455         vhost_dev_cleanup(&s->dev);
456         return ret;
457     }
458 
459     return 0;
460 }
461 
462 static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
463 {
464     ERRP_GUARD();
465     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
466     VHostUserBlk *s = VHOST_USER_BLK(vdev);
467     size_t config_size;
468     int retries;
469     int i, ret;
470 
471     if (!s->chardev.chr) {
472         error_setg(errp, "chardev is mandatory");
473         return;
474     }
475 
476     if (s->num_queues == VHOST_USER_BLK_AUTO_NUM_QUEUES) {
477         s->num_queues = 1;
478     }
479     if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) {
480         error_setg(errp, "invalid number of IO queues");
481         return;
482     }
483 
484     if (!s->queue_size) {
485         error_setg(errp, "queue size must be non-zero");
486         return;
487     }
488     if (s->queue_size > VIRTQUEUE_MAX_SIZE) {
489         error_setg(errp, "queue size must not exceed %d",
490                    VIRTQUEUE_MAX_SIZE);
491         return;
492     }
493 
494     if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) {
495         return;
496     }
497 
498     config_size = virtio_get_config_size(&virtio_blk_cfg_size_params,
499                                          vdev->host_features);
500     virtio_init(vdev, VIRTIO_ID_BLOCK, config_size);
501 
502     s->virtqs = g_new(VirtQueue *, s->num_queues);
503     for (i = 0; i < s->num_queues; i++) {
504         s->virtqs[i] = virtio_add_queue(vdev, s->queue_size,
505                                         vhost_user_blk_handle_output);
506     }
507 
508     s->inflight = g_new0(struct vhost_inflight, 1);
509     s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
510 
511     retries = REALIZE_CONNECTION_RETRIES;
512     assert(!*errp);
513     do {
514         if (*errp) {
515             error_prepend(errp, "Reconnecting after error: ");
516             error_report_err(*errp);
517             *errp = NULL;
518         }
519         ret = vhost_user_blk_realize_connect(s, errp);
520     } while (ret < 0 && retries--);
521 
522     if (ret < 0) {
523         goto virtio_err;
524     }
525 
526     /* we're fully initialized, now we can operate, so add the handler */
527     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
528                              vhost_user_blk_event, NULL, (void *)dev,
529                              NULL, true);
530     return;
531 
532 virtio_err:
533     g_free(s->vhost_vqs);
534     s->vhost_vqs = NULL;
535     g_free(s->inflight);
536     s->inflight = NULL;
537     for (i = 0; i < s->num_queues; i++) {
538         virtio_delete_queue(s->virtqs[i]);
539     }
540     g_free(s->virtqs);
541     virtio_cleanup(vdev);
542     vhost_user_cleanup(&s->vhost_user);
543 }
544 
545 static void vhost_user_blk_device_unrealize(DeviceState *dev)
546 {
547     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
548     VHostUserBlk *s = VHOST_USER_BLK(dev);
549     int i;
550 
551     virtio_set_status(vdev, 0);
552     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
553                              NULL, NULL, NULL, false);
554     vhost_dev_cleanup(&s->dev);
555     vhost_dev_free_inflight(s->inflight);
556     g_free(s->vhost_vqs);
557     s->vhost_vqs = NULL;
558     g_free(s->inflight);
559     s->inflight = NULL;
560 
561     for (i = 0; i < s->num_queues; i++) {
562         virtio_delete_queue(s->virtqs[i]);
563     }
564     g_free(s->virtqs);
565     virtio_cleanup(vdev);
566     vhost_user_cleanup(&s->vhost_user);
567 }
568 
569 static void vhost_user_blk_instance_init(Object *obj)
570 {
571     VHostUserBlk *s = VHOST_USER_BLK(obj);
572 
573     device_add_bootindex_property(obj, &s->bootindex, "bootindex",
574                                   "/disk@0,0", DEVICE(obj));
575 }
576 
577 static struct vhost_dev *vhost_user_blk_get_vhost(VirtIODevice *vdev)
578 {
579     VHostUserBlk *s = VHOST_USER_BLK(vdev);
580     return &s->dev;
581 }
582 
583 static const VMStateDescription vmstate_vhost_user_blk = {
584     .name = "vhost-user-blk",
585     .minimum_version_id = 1,
586     .version_id = 1,
587     .fields = (VMStateField[]) {
588         VMSTATE_VIRTIO_DEVICE,
589         VMSTATE_END_OF_LIST()
590     },
591 };
592 
593 static Property vhost_user_blk_properties[] = {
594     DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev),
595     DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues,
596                        VHOST_USER_BLK_AUTO_NUM_QUEUES),
597     DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128),
598     DEFINE_PROP_BIT64("config-wce", VHostUserBlk, parent_obj.host_features,
599                       VIRTIO_BLK_F_CONFIG_WCE, true),
600     DEFINE_PROP_BIT64("discard", VHostUserBlk, parent_obj.host_features,
601                       VIRTIO_BLK_F_DISCARD, true),
602     DEFINE_PROP_BIT64("write-zeroes", VHostUserBlk, parent_obj.host_features,
603                       VIRTIO_BLK_F_WRITE_ZEROES, true),
604     DEFINE_PROP_END_OF_LIST(),
605 };
606 
607 static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
608 {
609     DeviceClass *dc = DEVICE_CLASS(klass);
610     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
611 
612     device_class_set_props(dc, vhost_user_blk_properties);
613     dc->vmsd = &vmstate_vhost_user_blk;
614     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
615     vdc->realize = vhost_user_blk_device_realize;
616     vdc->unrealize = vhost_user_blk_device_unrealize;
617     vdc->get_config = vhost_user_blk_update_config;
618     vdc->set_config = vhost_user_blk_set_config;
619     vdc->get_features = vhost_user_blk_get_features;
620     vdc->set_status = vhost_user_blk_set_status;
621     vdc->reset = vhost_user_blk_reset;
622     vdc->get_vhost = vhost_user_blk_get_vhost;
623 }
624 
625 static const TypeInfo vhost_user_blk_info = {
626     .name = TYPE_VHOST_USER_BLK,
627     .parent = TYPE_VIRTIO_DEVICE,
628     .instance_size = sizeof(VHostUserBlk),
629     .instance_init = vhost_user_blk_instance_init,
630     .class_init = vhost_user_blk_class_init,
631 };
632 
633 static void virtio_register_types(void)
634 {
635     type_register_static(&vhost_user_blk_info);
636 }
637 
638 type_init(virtio_register_types)
639