xref: /openbmc/qemu/hw/block/vhost-user-blk.c (revision b83a80e8)
1 /*
2  * vhost-user-blk host device
3  *
4  * Copyright(C) 2017 Intel Corporation.
5  *
6  * Authors:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by:
10  * Felipe Franciosi <felipe@nutanix.com>
11  * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
12  * Nicholas Bellinger <nab@risingtidesystems.com>
13  *
14  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
15  * See the COPYING.LIB file in the top-level directory.
16  *
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
21 #include "qemu/error-report.h"
22 #include "qemu/cutils.h"
23 #include "hw/qdev-core.h"
24 #include "hw/qdev-properties.h"
25 #include "hw/qdev-properties-system.h"
26 #include "hw/virtio/vhost.h"
27 #include "hw/virtio/vhost-user-blk.h"
28 #include "hw/virtio/virtio.h"
29 #include "hw/virtio/virtio-bus.h"
30 #include "hw/virtio/virtio-access.h"
31 #include "sysemu/sysemu.h"
32 #include "sysemu/runstate.h"
33 
34 #define REALIZE_CONNECTION_RETRIES 3
35 
36 static const int user_feature_bits[] = {
37     VIRTIO_BLK_F_SIZE_MAX,
38     VIRTIO_BLK_F_SEG_MAX,
39     VIRTIO_BLK_F_GEOMETRY,
40     VIRTIO_BLK_F_BLK_SIZE,
41     VIRTIO_BLK_F_TOPOLOGY,
42     VIRTIO_BLK_F_MQ,
43     VIRTIO_BLK_F_RO,
44     VIRTIO_BLK_F_FLUSH,
45     VIRTIO_BLK_F_CONFIG_WCE,
46     VIRTIO_BLK_F_DISCARD,
47     VIRTIO_BLK_F_WRITE_ZEROES,
48     VIRTIO_F_VERSION_1,
49     VIRTIO_RING_F_INDIRECT_DESC,
50     VIRTIO_RING_F_EVENT_IDX,
51     VIRTIO_F_NOTIFY_ON_EMPTY,
52     VIRTIO_F_RING_PACKED,
53     VIRTIO_F_IOMMU_PLATFORM,
54     VHOST_INVALID_FEATURE_BIT
55 };
56 
57 static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
58 
59 static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
60 {
61     VHostUserBlk *s = VHOST_USER_BLK(vdev);
62 
63     /* Our num_queues overrides the device backend */
64     virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues);
65 
66     memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config));
67 }
68 
69 static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
70 {
71     VHostUserBlk *s = VHOST_USER_BLK(vdev);
72     struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
73     int ret;
74 
75     if (blkcfg->wce == s->blkcfg.wce) {
76         return;
77     }
78 
79     ret = vhost_dev_set_config(&s->dev, &blkcfg->wce,
80                                offsetof(struct virtio_blk_config, wce),
81                                sizeof(blkcfg->wce),
82                                VHOST_SET_CONFIG_TYPE_MASTER);
83     if (ret) {
84         error_report("set device config space failed");
85         return;
86     }
87 
88     s->blkcfg.wce = blkcfg->wce;
89 }
90 
91 static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
92 {
93     int ret;
94     struct virtio_blk_config blkcfg;
95     VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
96     Error *local_err = NULL;
97 
98     ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg,
99                                sizeof(struct virtio_blk_config),
100                                &local_err);
101     if (ret < 0) {
102         error_report_err(local_err);
103         return ret;
104     }
105 
106     /* valid for resize only */
107     if (blkcfg.capacity != s->blkcfg.capacity) {
108         s->blkcfg.capacity = blkcfg.capacity;
109         memcpy(dev->vdev->config, &s->blkcfg, sizeof(struct virtio_blk_config));
110         virtio_notify_config(dev->vdev);
111     }
112 
113     return 0;
114 }
115 
116 const VhostDevConfigOps blk_ops = {
117     .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
118 };
119 
120 static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp)
121 {
122     VHostUserBlk *s = VHOST_USER_BLK(vdev);
123     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
124     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
125     int i, ret;
126 
127     if (!k->set_guest_notifiers) {
128         error_setg(errp, "binding does not support guest notifiers");
129         return -ENOSYS;
130     }
131 
132     ret = vhost_dev_enable_notifiers(&s->dev, vdev);
133     if (ret < 0) {
134         error_setg_errno(errp, -ret, "Error enabling host notifiers");
135         return ret;
136     }
137 
138     ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
139     if (ret < 0) {
140         error_setg_errno(errp, -ret, "Error binding guest notifier");
141         goto err_host_notifiers;
142     }
143 
144     s->dev.acked_features = vdev->guest_features;
145 
146     ret = vhost_dev_prepare_inflight(&s->dev, vdev);
147     if (ret < 0) {
148         error_setg_errno(errp, -ret, "Error setting inflight format");
149         goto err_guest_notifiers;
150     }
151 
152     if (!s->inflight->addr) {
153         ret = vhost_dev_get_inflight(&s->dev, s->queue_size, s->inflight);
154         if (ret < 0) {
155             error_setg_errno(errp, -ret, "Error getting inflight");
156             goto err_guest_notifiers;
157         }
158     }
159 
160     ret = vhost_dev_set_inflight(&s->dev, s->inflight);
161     if (ret < 0) {
162         error_setg_errno(errp, -ret, "Error setting inflight");
163         goto err_guest_notifiers;
164     }
165 
166     ret = vhost_dev_start(&s->dev, vdev);
167     if (ret < 0) {
168         error_setg_errno(errp, -ret, "Error starting vhost");
169         goto err_guest_notifiers;
170     }
171     s->started_vu = true;
172 
173     /* guest_notifier_mask/pending not used yet, so just unmask
174      * everything here. virtio-pci will do the right thing by
175      * enabling/disabling irqfd.
176      */
177     for (i = 0; i < s->dev.nvqs; i++) {
178         vhost_virtqueue_mask(&s->dev, vdev, i, false);
179     }
180 
181     return ret;
182 
183 err_guest_notifiers:
184     k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
185 err_host_notifiers:
186     vhost_dev_disable_notifiers(&s->dev, vdev);
187     return ret;
188 }
189 
190 static void vhost_user_blk_stop(VirtIODevice *vdev)
191 {
192     VHostUserBlk *s = VHOST_USER_BLK(vdev);
193     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
194     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
195     int ret;
196 
197     if (!s->started_vu) {
198         return;
199     }
200     s->started_vu = false;
201 
202     if (!k->set_guest_notifiers) {
203         return;
204     }
205 
206     vhost_dev_stop(&s->dev, vdev);
207 
208     ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
209     if (ret < 0) {
210         error_report("vhost guest notifier cleanup failed: %d", ret);
211         return;
212     }
213 
214     vhost_dev_disable_notifiers(&s->dev, vdev);
215 }
216 
217 static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
218 {
219     VHostUserBlk *s = VHOST_USER_BLK(vdev);
220     bool should_start = virtio_device_started(vdev, status);
221     Error *local_err = NULL;
222     int ret;
223 
224     if (!vdev->vm_running) {
225         should_start = false;
226     }
227 
228     if (!s->connected) {
229         return;
230     }
231 
232     if (s->dev.started == should_start) {
233         return;
234     }
235 
236     if (should_start) {
237         ret = vhost_user_blk_start(vdev, &local_err);
238         if (ret < 0) {
239             error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
240             qemu_chr_fe_disconnect(&s->chardev);
241         }
242     } else {
243         vhost_user_blk_stop(vdev);
244     }
245 
246 }
247 
248 static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
249                                             uint64_t features,
250                                             Error **errp)
251 {
252     VHostUserBlk *s = VHOST_USER_BLK(vdev);
253 
254     /* Turn on pre-defined features */
255     virtio_add_feature(&features, VIRTIO_BLK_F_SIZE_MAX);
256     virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
257     virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
258     virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
259     virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
260     virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH);
261     virtio_add_feature(&features, VIRTIO_BLK_F_RO);
262     virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD);
263     virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES);
264 
265     if (s->config_wce) {
266         virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
267     }
268     if (s->num_queues > 1) {
269         virtio_add_feature(&features, VIRTIO_BLK_F_MQ);
270     }
271 
272     return vhost_get_features(&s->dev, user_feature_bits, features);
273 }
274 
275 static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
276 {
277     VHostUserBlk *s = VHOST_USER_BLK(vdev);
278     Error *local_err = NULL;
279     int i, ret;
280 
281     if (!vdev->start_on_kick) {
282         return;
283     }
284 
285     if (!s->connected) {
286         return;
287     }
288 
289     if (s->dev.started) {
290         return;
291     }
292 
293     /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
294      * vhost here instead of waiting for .set_status().
295      */
296     ret = vhost_user_blk_start(vdev, &local_err);
297     if (ret < 0) {
298         error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
299         qemu_chr_fe_disconnect(&s->chardev);
300         return;
301     }
302 
303     /* Kick right away to begin processing requests already in vring */
304     for (i = 0; i < s->dev.nvqs; i++) {
305         VirtQueue *kick_vq = virtio_get_queue(vdev, i);
306 
307         if (!virtio_queue_get_desc_addr(vdev, i)) {
308             continue;
309         }
310         event_notifier_set(virtio_queue_get_host_notifier(kick_vq));
311     }
312 }
313 
314 static void vhost_user_blk_reset(VirtIODevice *vdev)
315 {
316     VHostUserBlk *s = VHOST_USER_BLK(vdev);
317 
318     vhost_dev_free_inflight(s->inflight);
319 }
320 
321 static int vhost_user_blk_connect(DeviceState *dev, Error **errp)
322 {
323     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
324     VHostUserBlk *s = VHOST_USER_BLK(vdev);
325     int ret = 0;
326 
327     if (s->connected) {
328         return 0;
329     }
330     s->connected = true;
331 
332     s->dev.num_queues = s->num_queues;
333     s->dev.nvqs = s->num_queues;
334     s->dev.vqs = s->vhost_vqs;
335     s->dev.vq_index = 0;
336     s->dev.backend_features = 0;
337 
338     vhost_dev_set_config_notifier(&s->dev, &blk_ops);
339 
340     ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0,
341                          errp);
342     if (ret < 0) {
343         return ret;
344     }
345 
346     /* restore vhost state */
347     if (virtio_device_started(vdev, vdev->status)) {
348         ret = vhost_user_blk_start(vdev, errp);
349         if (ret < 0) {
350             return ret;
351         }
352     }
353 
354     return 0;
355 }
356 
357 static void vhost_user_blk_disconnect(DeviceState *dev)
358 {
359     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
360     VHostUserBlk *s = VHOST_USER_BLK(vdev);
361 
362     if (!s->connected) {
363         return;
364     }
365     s->connected = false;
366 
367     vhost_user_blk_stop(vdev);
368 
369     vhost_dev_cleanup(&s->dev);
370 }
371 
372 static void vhost_user_blk_chr_closed_bh(void *opaque)
373 {
374     DeviceState *dev = opaque;
375     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
376     VHostUserBlk *s = VHOST_USER_BLK(vdev);
377 
378     vhost_user_blk_disconnect(dev);
379     qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
380                              NULL, opaque, NULL, true);
381 }
382 
383 static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
384 {
385     DeviceState *dev = opaque;
386     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
387     VHostUserBlk *s = VHOST_USER_BLK(vdev);
388     Error *local_err = NULL;
389 
390     switch (event) {
391     case CHR_EVENT_OPENED:
392         if (vhost_user_blk_connect(dev, &local_err) < 0) {
393             error_report_err(local_err);
394             qemu_chr_fe_disconnect(&s->chardev);
395             return;
396         }
397         break;
398     case CHR_EVENT_CLOSED:
399         if (!runstate_check(RUN_STATE_SHUTDOWN)) {
400             /*
401              * A close event may happen during a read/write, but vhost
402              * code assumes the vhost_dev remains setup, so delay the
403              * stop & clear.
404              */
405             AioContext *ctx = qemu_get_current_aio_context();
406 
407             qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL,
408                     NULL, NULL, false);
409             aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque);
410 
411             /*
412              * Move vhost device to the stopped state. The vhost-user device
413              * will be clean up and disconnected in BH. This can be useful in
414              * the vhost migration code. If disconnect was caught there is an
415              * option for the general vhost code to get the dev state without
416              * knowing its type (in this case vhost-user).
417              */
418             s->dev.started = false;
419         }
420         break;
421     case CHR_EVENT_BREAK:
422     case CHR_EVENT_MUX_IN:
423     case CHR_EVENT_MUX_OUT:
424         /* Ignore */
425         break;
426     }
427 }
428 
429 static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp)
430 {
431     DeviceState *dev = &s->parent_obj.parent_obj;
432     int ret;
433 
434     s->connected = false;
435 
436     ret = qemu_chr_fe_wait_connected(&s->chardev, errp);
437     if (ret < 0) {
438         return ret;
439     }
440 
441     ret = vhost_user_blk_connect(dev, errp);
442     if (ret < 0) {
443         qemu_chr_fe_disconnect(&s->chardev);
444         return ret;
445     }
446     assert(s->connected);
447 
448     ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
449                                sizeof(struct virtio_blk_config), errp);
450     if (ret < 0) {
451         qemu_chr_fe_disconnect(&s->chardev);
452         vhost_dev_cleanup(&s->dev);
453         return ret;
454     }
455 
456     return 0;
457 }
458 
459 static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
460 {
461     ERRP_GUARD();
462     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
463     VHostUserBlk *s = VHOST_USER_BLK(vdev);
464     int retries;
465     int i, ret;
466 
467     if (!s->chardev.chr) {
468         error_setg(errp, "chardev is mandatory");
469         return;
470     }
471 
472     if (s->num_queues == VHOST_USER_BLK_AUTO_NUM_QUEUES) {
473         s->num_queues = 1;
474     }
475     if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) {
476         error_setg(errp, "invalid number of IO queues");
477         return;
478     }
479 
480     if (!s->queue_size) {
481         error_setg(errp, "queue size must be non-zero");
482         return;
483     }
484     if (s->queue_size > VIRTQUEUE_MAX_SIZE) {
485         error_setg(errp, "queue size must not exceed %d",
486                    VIRTQUEUE_MAX_SIZE);
487         return;
488     }
489 
490     if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) {
491         return;
492     }
493 
494     virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
495                 sizeof(struct virtio_blk_config));
496 
497     s->virtqs = g_new(VirtQueue *, s->num_queues);
498     for (i = 0; i < s->num_queues; i++) {
499         s->virtqs[i] = virtio_add_queue(vdev, s->queue_size,
500                                         vhost_user_blk_handle_output);
501     }
502 
503     s->inflight = g_new0(struct vhost_inflight, 1);
504     s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
505 
506     retries = REALIZE_CONNECTION_RETRIES;
507     assert(!*errp);
508     do {
509         if (*errp) {
510             error_prepend(errp, "Reconnecting after error: ");
511             error_report_err(*errp);
512             *errp = NULL;
513         }
514         ret = vhost_user_blk_realize_connect(s, errp);
515     } while (ret < 0 && retries--);
516 
517     if (ret < 0) {
518         goto virtio_err;
519     }
520 
521     /* we're fully initialized, now we can operate, so add the handler */
522     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
523                              vhost_user_blk_event, NULL, (void *)dev,
524                              NULL, true);
525     return;
526 
527 virtio_err:
528     g_free(s->vhost_vqs);
529     s->vhost_vqs = NULL;
530     g_free(s->inflight);
531     s->inflight = NULL;
532     for (i = 0; i < s->num_queues; i++) {
533         virtio_delete_queue(s->virtqs[i]);
534     }
535     g_free(s->virtqs);
536     virtio_cleanup(vdev);
537     vhost_user_cleanup(&s->vhost_user);
538 }
539 
540 static void vhost_user_blk_device_unrealize(DeviceState *dev)
541 {
542     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
543     VHostUserBlk *s = VHOST_USER_BLK(dev);
544     int i;
545 
546     virtio_set_status(vdev, 0);
547     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL, NULL,
548                              NULL, NULL, NULL, false);
549     vhost_dev_cleanup(&s->dev);
550     vhost_dev_free_inflight(s->inflight);
551     g_free(s->vhost_vqs);
552     s->vhost_vqs = NULL;
553     g_free(s->inflight);
554     s->inflight = NULL;
555 
556     for (i = 0; i < s->num_queues; i++) {
557         virtio_delete_queue(s->virtqs[i]);
558     }
559     g_free(s->virtqs);
560     virtio_cleanup(vdev);
561     vhost_user_cleanup(&s->vhost_user);
562 }
563 
564 static void vhost_user_blk_instance_init(Object *obj)
565 {
566     VHostUserBlk *s = VHOST_USER_BLK(obj);
567 
568     device_add_bootindex_property(obj, &s->bootindex, "bootindex",
569                                   "/disk@0,0", DEVICE(obj));
570 }
571 
572 static const VMStateDescription vmstate_vhost_user_blk = {
573     .name = "vhost-user-blk",
574     .minimum_version_id = 1,
575     .version_id = 1,
576     .fields = (VMStateField[]) {
577         VMSTATE_VIRTIO_DEVICE,
578         VMSTATE_END_OF_LIST()
579     },
580 };
581 
582 static Property vhost_user_blk_properties[] = {
583     DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev),
584     DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues,
585                        VHOST_USER_BLK_AUTO_NUM_QUEUES),
586     DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128),
587     DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true),
588     DEFINE_PROP_END_OF_LIST(),
589 };
590 
591 static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
592 {
593     DeviceClass *dc = DEVICE_CLASS(klass);
594     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
595 
596     device_class_set_props(dc, vhost_user_blk_properties);
597     dc->vmsd = &vmstate_vhost_user_blk;
598     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
599     vdc->realize = vhost_user_blk_device_realize;
600     vdc->unrealize = vhost_user_blk_device_unrealize;
601     vdc->get_config = vhost_user_blk_update_config;
602     vdc->set_config = vhost_user_blk_set_config;
603     vdc->get_features = vhost_user_blk_get_features;
604     vdc->set_status = vhost_user_blk_set_status;
605     vdc->reset = vhost_user_blk_reset;
606 }
607 
608 static const TypeInfo vhost_user_blk_info = {
609     .name = TYPE_VHOST_USER_BLK,
610     .parent = TYPE_VIRTIO_DEVICE,
611     .instance_size = sizeof(VHostUserBlk),
612     .instance_init = vhost_user_blk_instance_init,
613     .class_init = vhost_user_blk_class_init,
614 };
615 
616 static void virtio_register_types(void)
617 {
618     type_register_static(&vhost_user_blk_info);
619 }
620 
621 type_init(virtio_register_types)
622