xref: /openbmc/qemu/contrib/vhost-user-blk/vhost-user-blk.c (revision 200280af0e19bfaeb9431eb0ee1ee2d8bf8d3a0a)
1 /*
2  * vhost-user-blk sample application
3  *
4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5  *
6  * Author:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10  * implementation by:
11  *  Felipe Franciosi <felipe@nutanix.com>
12  *  Anthony Liguori <aliguori@us.ibm.com>
13  *
14  * This work is licensed under the terms of the GNU GPL, version 2 only.
15  * See the COPYING file in the top-level directory.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "standard-headers/linux/virtio_blk.h"
20 #include "contrib/libvhost-user/libvhost-user-glib.h"
21 #include "contrib/libvhost-user/libvhost-user.h"
22 
23 
24 struct virtio_blk_inhdr {
25     unsigned char status;
26 };
27 
28 /* vhost user block device */
29 typedef struct VubDev {
30     VugDev parent;
31     int blk_fd;
32     struct virtio_blk_config blkcfg;
33     bool enable_ro;
34     char *blk_name;
35     GMainLoop *loop;
36 } VubDev;
37 
38 typedef struct VubReq {
39     VuVirtqElement *elem;
40     int64_t sector_num;
41     size_t size;
42     struct virtio_blk_inhdr *in;
43     struct virtio_blk_outhdr *out;
44     VubDev *vdev_blk;
45     struct VuVirtq *vq;
46 } VubReq;
47 
48 /* refer util/iov.c */
49 static size_t vub_iov_size(const struct iovec *iov,
50                               const unsigned int iov_cnt)
51 {
52     size_t len;
53     unsigned int i;
54 
55     len = 0;
56     for (i = 0; i < iov_cnt; i++) {
57         len += iov[i].iov_len;
58     }
59     return len;
60 }
61 
62 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
63 {
64     VugDev *gdev;
65     VubDev *vdev_blk;
66 
67     assert(vu_dev);
68 
69     gdev = container_of(vu_dev, VugDev, parent);
70     vdev_blk = container_of(gdev, VubDev, parent);
71     if (buf) {
72         g_warning("vu_panic: %s", buf);
73     }
74 
75     g_main_loop_quit(vdev_blk->loop);
76 }
77 
78 static void vub_req_complete(VubReq *req)
79 {
80     VugDev *gdev = &req->vdev_blk->parent;
81     VuDev *vu_dev = &gdev->parent;
82 
83     /* IO size with 1 extra status byte */
84     vu_queue_push(vu_dev, req->vq, req->elem,
85                   req->size + 1);
86     vu_queue_notify(vu_dev, req->vq);
87 
88     if (req->elem) {
89         free(req->elem);
90     }
91 
92     g_free(req);
93 }
94 
95 static int vub_open(const char *file_name, bool wce)
96 {
97     int fd;
98     int flags = O_RDWR;
99 
100     if (!wce) {
101         flags |= O_DIRECT;
102     }
103 
104     fd = open(file_name, flags);
105     if (fd < 0) {
106         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
107                 strerror(errno));
108         return -1;
109     }
110 
111     return fd;
112 }
113 
114 static ssize_t
115 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
116 {
117     VubDev *vdev_blk = req->vdev_blk;
118     ssize_t rc;
119 
120     if (!iovcnt) {
121         fprintf(stderr, "Invalid Read IOV count\n");
122         return -1;
123     }
124 
125     req->size = vub_iov_size(iov, iovcnt);
126     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
127     if (rc < 0) {
128         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
129                 vdev_blk->blk_name, req->sector_num, req->size,
130                 strerror(errno));
131         return -1;
132     }
133 
134     return rc;
135 }
136 
137 static ssize_t
138 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
139 {
140     VubDev *vdev_blk = req->vdev_blk;
141     ssize_t rc;
142 
143     if (!iovcnt) {
144         fprintf(stderr, "Invalid Write IOV count\n");
145         return -1;
146     }
147 
148     req->size = vub_iov_size(iov, iovcnt);
149     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
150     if (rc < 0) {
151         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
152                 vdev_blk->blk_name, req->sector_num, req->size,
153                 strerror(errno));
154         return -1;
155     }
156 
157     return rc;
158 }
159 
160 static void
161 vub_flush(VubReq *req)
162 {
163     VubDev *vdev_blk = req->vdev_blk;
164 
165     fdatasync(vdev_blk->blk_fd);
166 }
167 
168 static int vub_virtio_process_req(VubDev *vdev_blk,
169                                      VuVirtq *vq)
170 {
171     VugDev *gdev = &vdev_blk->parent;
172     VuDev *vu_dev = &gdev->parent;
173     VuVirtqElement *elem;
174     uint32_t type;
175     unsigned in_num;
176     unsigned out_num;
177     VubReq *req;
178 
179     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
180     if (!elem) {
181         return -1;
182     }
183 
184     /* refer to hw/block/virtio_blk.c */
185     if (elem->out_num < 1 || elem->in_num < 1) {
186         fprintf(stderr, "virtio-blk request missing headers\n");
187         free(elem);
188         return -1;
189     }
190 
191     req = g_new0(VubReq, 1);
192     req->vdev_blk = vdev_blk;
193     req->vq = vq;
194     req->elem = elem;
195 
196     in_num = elem->in_num;
197     out_num = elem->out_num;
198 
199     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
200     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
201         fprintf(stderr, "Invalid outhdr size\n");
202         goto err;
203     }
204     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
205     out_num--;
206 
207     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
208         fprintf(stderr, "Invalid inhdr size\n");
209         goto err;
210     }
211     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
212     in_num--;
213 
214     type = le32toh(req->out->type);
215     switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
216         case VIRTIO_BLK_T_IN: {
217             ssize_t ret = 0;
218             bool is_write = type & VIRTIO_BLK_T_OUT;
219             req->sector_num = le64toh(req->out->sector);
220             if (is_write) {
221                 ret  = vub_writev(req, &elem->out_sg[1], out_num);
222             } else {
223                 ret = vub_readv(req, &elem->in_sg[0], in_num);
224             }
225             if (ret >= 0) {
226                 req->in->status = VIRTIO_BLK_S_OK;
227             } else {
228                 req->in->status = VIRTIO_BLK_S_IOERR;
229             }
230             vub_req_complete(req);
231             break;
232         }
233         case VIRTIO_BLK_T_FLUSH: {
234             vub_flush(req);
235             req->in->status = VIRTIO_BLK_S_OK;
236             vub_req_complete(req);
237             break;
238         }
239         case VIRTIO_BLK_T_GET_ID: {
240             size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
241                               VIRTIO_BLK_ID_BYTES);
242             snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
243             req->in->status = VIRTIO_BLK_S_OK;
244             req->size = elem->in_sg[0].iov_len;
245             vub_req_complete(req);
246             break;
247         }
248         default: {
249             req->in->status = VIRTIO_BLK_S_UNSUPP;
250             vub_req_complete(req);
251             break;
252         }
253     }
254 
255     return 0;
256 
257 err:
258     free(elem);
259     g_free(req);
260     return -1;
261 }
262 
263 static void vub_process_vq(VuDev *vu_dev, int idx)
264 {
265     VugDev *gdev;
266     VubDev *vdev_blk;
267     VuVirtq *vq;
268     int ret;
269 
270     if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
271         fprintf(stderr, "VQ Index out of range: %d\n", idx);
272         vub_panic_cb(vu_dev, NULL);
273         return;
274     }
275 
276     gdev = container_of(vu_dev, VugDev, parent);
277     vdev_blk = container_of(gdev, VubDev, parent);
278     assert(vdev_blk);
279 
280     vq = vu_get_queue(vu_dev, idx);
281     assert(vq);
282 
283     while (1) {
284         ret = vub_virtio_process_req(vdev_blk, vq);
285         if (ret) {
286             break;
287         }
288     }
289 }
290 
291 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
292 {
293     VuVirtq *vq;
294 
295     assert(vu_dev);
296 
297     vq = vu_get_queue(vu_dev, idx);
298     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
299 }
300 
301 static uint64_t
302 vub_get_features(VuDev *dev)
303 {
304     uint64_t features;
305     VugDev *gdev;
306     VubDev *vdev_blk;
307 
308     gdev = container_of(dev, VugDev, parent);
309     vdev_blk = container_of(gdev, VubDev, parent);
310 
311     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
312                1ull << VIRTIO_BLK_F_SEG_MAX |
313                1ull << VIRTIO_BLK_F_TOPOLOGY |
314                1ull << VIRTIO_BLK_F_BLK_SIZE |
315                1ull << VIRTIO_BLK_F_FLUSH |
316                1ull << VIRTIO_BLK_F_CONFIG_WCE |
317                1ull << VIRTIO_F_VERSION_1 |
318                1ull << VHOST_USER_F_PROTOCOL_FEATURES;
319 
320     if (vdev_blk->enable_ro) {
321         features |= 1ull << VIRTIO_BLK_F_RO;
322     }
323 
324     return features;
325 }
326 
327 static uint64_t
328 vub_get_protocol_features(VuDev *dev)
329 {
330     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
331 }
332 
333 static int
334 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
335 {
336     VugDev *gdev;
337     VubDev *vdev_blk;
338 
339     gdev = container_of(vu_dev, VugDev, parent);
340     vdev_blk = container_of(gdev, VubDev, parent);
341     memcpy(config, &vdev_blk->blkcfg, len);
342 
343     return 0;
344 }
345 
346 static int
347 vub_set_config(VuDev *vu_dev, const uint8_t *data,
348                uint32_t offset, uint32_t size, uint32_t flags)
349 {
350     VugDev *gdev;
351     VubDev *vdev_blk;
352     uint8_t wce;
353     int fd;
354 
355     /* don't support live migration */
356     if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
357         return -1;
358     }
359 
360     gdev = container_of(vu_dev, VugDev, parent);
361     vdev_blk = container_of(gdev, VubDev, parent);
362 
363     if (offset != offsetof(struct virtio_blk_config, wce) ||
364         size != 1) {
365         return -1;
366     }
367 
368     wce = *data;
369     if (wce == vdev_blk->blkcfg.wce) {
370         /* Do nothing as same with old configuration */
371         return 0;
372     }
373 
374     vdev_blk->blkcfg.wce = wce;
375     fprintf(stdout, "Write Cache Policy Changed\n");
376     if (vdev_blk->blk_fd >= 0) {
377         close(vdev_blk->blk_fd);
378         vdev_blk->blk_fd = -1;
379     }
380 
381     fd = vub_open(vdev_blk->blk_name, wce);
382     if (fd < 0) {
383         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
384         vdev_blk->blk_fd = -1;
385         return -1;
386     }
387     vdev_blk->blk_fd = fd;
388 
389     return 0;
390 }
391 
392 static const VuDevIface vub_iface = {
393     .get_features = vub_get_features,
394     .queue_set_started = vub_queue_set_started,
395     .get_protocol_features = vub_get_protocol_features,
396     .get_config = vub_get_config,
397     .set_config = vub_set_config,
398 };
399 
400 static int unix_sock_new(char *unix_fn)
401 {
402     int sock;
403     struct sockaddr_un un;
404     size_t len;
405 
406     assert(unix_fn);
407 
408     sock = socket(AF_UNIX, SOCK_STREAM, 0);
409     if (sock <= 0) {
410         perror("socket");
411         return -1;
412     }
413 
414     un.sun_family = AF_UNIX;
415     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
416     len = sizeof(un.sun_family) + strlen(un.sun_path);
417 
418     (void)unlink(unix_fn);
419     if (bind(sock, (struct sockaddr *)&un, len) < 0) {
420         perror("bind");
421         goto fail;
422     }
423 
424     if (listen(sock, 1) < 0) {
425         perror("listen");
426         goto fail;
427     }
428 
429     return sock;
430 
431 fail:
432     (void)close(sock);
433 
434     return -1;
435 }
436 
437 static void vub_free(struct VubDev *vdev_blk)
438 {
439     if (!vdev_blk) {
440         return;
441     }
442 
443     g_main_loop_unref(vdev_blk->loop);
444     if (vdev_blk->blk_fd >= 0) {
445         close(vdev_blk->blk_fd);
446     }
447     g_free(vdev_blk);
448 }
449 
450 static uint32_t
451 vub_get_blocksize(int fd)
452 {
453     uint32_t blocksize = 512;
454 
455 #if defined(__linux__) && defined(BLKSSZGET)
456     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
457         return blocklen;
458     }
459 #endif
460 
461     return blocksize;
462 }
463 
464 static void
465 vub_initialize_config(int fd, struct virtio_blk_config *config)
466 {
467     off64_t capacity;
468 
469     capacity = lseek64(fd, 0, SEEK_END);
470     config->capacity = capacity >> 9;
471     config->blk_size = vub_get_blocksize(fd);
472     config->size_max = 65536;
473     config->seg_max = 128 - 2;
474     config->min_io_size = 1;
475     config->opt_io_size = 1;
476     config->num_queues = 1;
477 }
478 
479 static VubDev *
480 vub_new(char *blk_file)
481 {
482     VubDev *vdev_blk;
483 
484     vdev_blk = g_new0(VubDev, 1);
485     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
486     vdev_blk->blk_fd = vub_open(blk_file, 0);
487     if (vdev_blk->blk_fd  < 0) {
488         fprintf(stderr, "Error to open block device %s\n", blk_file);
489         vub_free(vdev_blk);
490         return NULL;
491     }
492     vdev_blk->enable_ro = false;
493     vdev_blk->blkcfg.wce = 0;
494     vdev_blk->blk_name = blk_file;
495 
496     /* fill virtio_blk_config with block parameters */
497     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
498 
499     return vdev_blk;
500 }
501 
502 int main(int argc, char **argv)
503 {
504     int opt;
505     char *unix_socket = NULL;
506     char *blk_file = NULL;
507     bool enable_ro = false;
508     int lsock = -1, csock = -1;
509     VubDev *vdev_blk = NULL;
510 
511     while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
512         switch (opt) {
513         case 'b':
514             blk_file = g_strdup(optarg);
515             break;
516         case 's':
517             unix_socket = g_strdup(optarg);
518             break;
519         case 'r':
520             enable_ro = true;
521             break;
522         case 'h':
523         default:
524             printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
525                    " | -r Enable read-only ] | [ -h ]\n", argv[0]);
526             return 0;
527         }
528     }
529 
530     if (!unix_socket || !blk_file) {
531         printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
532                " | -r Enable read-only ] | [ -h ]\n", argv[0]);
533         return -1;
534     }
535 
536     lsock = unix_sock_new(unix_socket);
537     if (lsock < 0) {
538         goto err;
539     }
540 
541     csock = accept(lsock, (void *)0, (void *)0);
542     if (csock < 0) {
543         fprintf(stderr, "Accept error %s\n", strerror(errno));
544         goto err;
545     }
546 
547     vdev_blk = vub_new(blk_file);
548     if (!vdev_blk) {
549         goto err;
550     }
551     if (enable_ro) {
552         vdev_blk->enable_ro = true;
553     }
554 
555     vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
556 
557     g_main_loop_run(vdev_blk->loop);
558 
559     vug_deinit(&vdev_blk->parent);
560 
561 err:
562     vub_free(vdev_blk);
563     if (csock >= 0) {
564         close(csock);
565     }
566     if (lsock >= 0) {
567         close(lsock);
568     }
569     g_free(unix_socket);
570     g_free(blk_file);
571 
572     return 0;
573 }
574