1 /*
2  * vhost-user-blk sample application
3  *
4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5  *
6  * Author:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10  * implementation by:
11  *  Felipe Franciosi <felipe@nutanix.com>
12  *  Anthony Liguori <aliguori@us.ibm.com>
13  *
14  * This work is licensed under the terms of the GNU GPL, version 2 only.
15  * See the COPYING file in the top-level directory.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "standard-headers/linux/virtio_blk.h"
20 #include "contrib/libvhost-user/libvhost-user-glib.h"
21 #include "contrib/libvhost-user/libvhost-user.h"
22 
23 #if defined(__linux__)
24 #include <linux/fs.h>
25 #include <sys/ioctl.h>
26 #endif
27 
28 enum {
29     VHOST_USER_BLK_MAX_QUEUES = 8,
30 };
31 
32 struct virtio_blk_inhdr {
33     unsigned char status;
34 };
35 
36 /* vhost user block device */
37 typedef struct VubDev {
38     VugDev parent;
39     int blk_fd;
40     struct virtio_blk_config blkcfg;
41     bool enable_ro;
42     char *blk_name;
43     GMainLoop *loop;
44 } VubDev;
45 
46 typedef struct VubReq {
47     VuVirtqElement *elem;
48     int64_t sector_num;
49     size_t size;
50     struct virtio_blk_inhdr *in;
51     struct virtio_blk_outhdr *out;
52     VubDev *vdev_blk;
53     struct VuVirtq *vq;
54 } VubReq;
55 
56 /* refer util/iov.c */
57 static size_t vub_iov_size(const struct iovec *iov,
58                               const unsigned int iov_cnt)
59 {
60     size_t len;
61     unsigned int i;
62 
63     len = 0;
64     for (i = 0; i < iov_cnt; i++) {
65         len += iov[i].iov_len;
66     }
67     return len;
68 }
69 
70 static size_t vub_iov_to_buf(const struct iovec *iov,
71                              const unsigned int iov_cnt, void *buf)
72 {
73     size_t len;
74     unsigned int i;
75 
76     len = 0;
77     for (i = 0; i < iov_cnt; i++) {
78         memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
79         len += iov[i].iov_len;
80     }
81     return len;
82 }
83 
84 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
85 {
86     VugDev *gdev;
87     VubDev *vdev_blk;
88 
89     assert(vu_dev);
90 
91     gdev = container_of(vu_dev, VugDev, parent);
92     vdev_blk = container_of(gdev, VubDev, parent);
93     if (buf) {
94         g_warning("vu_panic: %s", buf);
95     }
96 
97     g_main_loop_quit(vdev_blk->loop);
98 }
99 
100 static void vub_req_complete(VubReq *req)
101 {
102     VugDev *gdev = &req->vdev_blk->parent;
103     VuDev *vu_dev = &gdev->parent;
104 
105     /* IO size with 1 extra status byte */
106     vu_queue_push(vu_dev, req->vq, req->elem,
107                   req->size + 1);
108     vu_queue_notify(vu_dev, req->vq);
109 
110     if (req->elem) {
111         free(req->elem);
112     }
113 
114     g_free(req);
115 }
116 
117 static int vub_open(const char *file_name, bool wce)
118 {
119     int fd;
120     int flags = O_RDWR;
121 
122     if (!wce) {
123         flags |= O_DIRECT;
124     }
125 
126     fd = open(file_name, flags);
127     if (fd < 0) {
128         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
129                 strerror(errno));
130         return -1;
131     }
132 
133     return fd;
134 }
135 
136 static ssize_t
137 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
138 {
139     VubDev *vdev_blk = req->vdev_blk;
140     ssize_t rc;
141 
142     if (!iovcnt) {
143         fprintf(stderr, "Invalid Read IOV count\n");
144         return -1;
145     }
146 
147     req->size = vub_iov_size(iov, iovcnt);
148     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
149     if (rc < 0) {
150         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
151                 vdev_blk->blk_name, req->sector_num, req->size,
152                 strerror(errno));
153         return -1;
154     }
155 
156     return rc;
157 }
158 
159 static ssize_t
160 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
161 {
162     VubDev *vdev_blk = req->vdev_blk;
163     ssize_t rc;
164 
165     if (!iovcnt) {
166         fprintf(stderr, "Invalid Write IOV count\n");
167         return -1;
168     }
169 
170     req->size = vub_iov_size(iov, iovcnt);
171     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
172     if (rc < 0) {
173         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
174                 vdev_blk->blk_name, req->sector_num, req->size,
175                 strerror(errno));
176         return -1;
177     }
178 
179     return rc;
180 }
181 
182 static int
183 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
184                          uint32_t type)
185 {
186     struct virtio_blk_discard_write_zeroes *desc;
187     ssize_t size;
188     void *buf;
189 
190     size = vub_iov_size(iov, iovcnt);
191     if (size != sizeof(*desc)) {
192         fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
193         return -1;
194     }
195     buf = g_new0(char, size);
196     vub_iov_to_buf(iov, iovcnt, buf);
197 
198     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
199     VubDev *vdev_blk = req->vdev_blk;
200     desc = (struct virtio_blk_discard_write_zeroes *)buf;
201     uint64_t range[2] = { le64toh(desc->sector) << 9,
202                           le32toh(desc->num_sectors) << 9 };
203     if (type == VIRTIO_BLK_T_DISCARD) {
204         if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
205             g_free(buf);
206             return 0;
207         }
208     } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
209         if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
210             g_free(buf);
211             return 0;
212         }
213     }
214     #endif
215 
216     g_free(buf);
217     return -1;
218 }
219 
220 static void
221 vub_flush(VubReq *req)
222 {
223     VubDev *vdev_blk = req->vdev_blk;
224 
225     fdatasync(vdev_blk->blk_fd);
226 }
227 
228 static int vub_virtio_process_req(VubDev *vdev_blk,
229                                      VuVirtq *vq)
230 {
231     VugDev *gdev = &vdev_blk->parent;
232     VuDev *vu_dev = &gdev->parent;
233     VuVirtqElement *elem;
234     uint32_t type;
235     unsigned in_num;
236     unsigned out_num;
237     VubReq *req;
238 
239     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
240     if (!elem) {
241         return -1;
242     }
243 
244     /* refer to hw/block/virtio_blk.c */
245     if (elem->out_num < 1 || elem->in_num < 1) {
246         fprintf(stderr, "virtio-blk request missing headers\n");
247         free(elem);
248         return -1;
249     }
250 
251     req = g_new0(VubReq, 1);
252     req->vdev_blk = vdev_blk;
253     req->vq = vq;
254     req->elem = elem;
255 
256     in_num = elem->in_num;
257     out_num = elem->out_num;
258 
259     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
260     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
261         fprintf(stderr, "Invalid outhdr size\n");
262         goto err;
263     }
264     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
265     out_num--;
266 
267     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
268         fprintf(stderr, "Invalid inhdr size\n");
269         goto err;
270     }
271     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
272     in_num--;
273 
274     type = le32toh(req->out->type);
275     switch (type & ~VIRTIO_BLK_T_BARRIER) {
276     case VIRTIO_BLK_T_IN:
277     case VIRTIO_BLK_T_OUT: {
278         ssize_t ret = 0;
279         bool is_write = type & VIRTIO_BLK_T_OUT;
280         req->sector_num = le64toh(req->out->sector);
281         if (is_write) {
282             ret  = vub_writev(req, &elem->out_sg[1], out_num);
283         } else {
284             ret = vub_readv(req, &elem->in_sg[0], in_num);
285         }
286         if (ret >= 0) {
287             req->in->status = VIRTIO_BLK_S_OK;
288         } else {
289             req->in->status = VIRTIO_BLK_S_IOERR;
290         }
291         vub_req_complete(req);
292         break;
293     }
294     case VIRTIO_BLK_T_FLUSH:
295         vub_flush(req);
296         req->in->status = VIRTIO_BLK_S_OK;
297         vub_req_complete(req);
298         break;
299     case VIRTIO_BLK_T_GET_ID: {
300         size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
301                           VIRTIO_BLK_ID_BYTES);
302         snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
303         req->in->status = VIRTIO_BLK_S_OK;
304         req->size = elem->in_sg[0].iov_len;
305         vub_req_complete(req);
306         break;
307     }
308     case VIRTIO_BLK_T_DISCARD:
309     case VIRTIO_BLK_T_WRITE_ZEROES: {
310         int rc;
311         rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
312         if (rc == 0) {
313             req->in->status = VIRTIO_BLK_S_OK;
314         } else {
315             req->in->status = VIRTIO_BLK_S_IOERR;
316         }
317         vub_req_complete(req);
318         break;
319     }
320     default:
321         req->in->status = VIRTIO_BLK_S_UNSUPP;
322         vub_req_complete(req);
323         break;
324     }
325 
326     return 0;
327 
328 err:
329     free(elem);
330     g_free(req);
331     return -1;
332 }
333 
334 static void vub_process_vq(VuDev *vu_dev, int idx)
335 {
336     VugDev *gdev;
337     VubDev *vdev_blk;
338     VuVirtq *vq;
339     int ret;
340 
341     gdev = container_of(vu_dev, VugDev, parent);
342     vdev_blk = container_of(gdev, VubDev, parent);
343     assert(vdev_blk);
344 
345     vq = vu_get_queue(vu_dev, idx);
346     assert(vq);
347 
348     while (1) {
349         ret = vub_virtio_process_req(vdev_blk, vq);
350         if (ret) {
351             break;
352         }
353     }
354 }
355 
356 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
357 {
358     VuVirtq *vq;
359 
360     assert(vu_dev);
361 
362     vq = vu_get_queue(vu_dev, idx);
363     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
364 }
365 
366 static uint64_t
367 vub_get_features(VuDev *dev)
368 {
369     uint64_t features;
370     VugDev *gdev;
371     VubDev *vdev_blk;
372 
373     gdev = container_of(dev, VugDev, parent);
374     vdev_blk = container_of(gdev, VubDev, parent);
375 
376     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
377                1ull << VIRTIO_BLK_F_SEG_MAX |
378                1ull << VIRTIO_BLK_F_TOPOLOGY |
379                1ull << VIRTIO_BLK_F_BLK_SIZE |
380                1ull << VIRTIO_BLK_F_FLUSH |
381                #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
382                1ull << VIRTIO_BLK_F_DISCARD |
383                1ull << VIRTIO_BLK_F_WRITE_ZEROES |
384                #endif
385                1ull << VIRTIO_BLK_F_CONFIG_WCE;
386 
387     if (vdev_blk->enable_ro) {
388         features |= 1ull << VIRTIO_BLK_F_RO;
389     }
390 
391     return features;
392 }
393 
394 static uint64_t
395 vub_get_protocol_features(VuDev *dev)
396 {
397     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
398            1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
399 }
400 
401 static int
402 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
403 {
404     VugDev *gdev;
405     VubDev *vdev_blk;
406 
407     g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1);
408 
409     gdev = container_of(vu_dev, VugDev, parent);
410     vdev_blk = container_of(gdev, VubDev, parent);
411     memcpy(config, &vdev_blk->blkcfg, len);
412 
413     return 0;
414 }
415 
416 static int
417 vub_set_config(VuDev *vu_dev, const uint8_t *data,
418                uint32_t offset, uint32_t size, uint32_t flags)
419 {
420     VugDev *gdev;
421     VubDev *vdev_blk;
422     uint8_t wce;
423     int fd;
424 
425     /* don't support live migration */
426     if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
427         return -1;
428     }
429 
430     gdev = container_of(vu_dev, VugDev, parent);
431     vdev_blk = container_of(gdev, VubDev, parent);
432 
433     if (offset != offsetof(struct virtio_blk_config, wce) ||
434         size != 1) {
435         return -1;
436     }
437 
438     wce = *data;
439     if (wce == vdev_blk->blkcfg.wce) {
440         /* Do nothing as same with old configuration */
441         return 0;
442     }
443 
444     vdev_blk->blkcfg.wce = wce;
445     fprintf(stdout, "Write Cache Policy Changed\n");
446     if (vdev_blk->blk_fd >= 0) {
447         close(vdev_blk->blk_fd);
448         vdev_blk->blk_fd = -1;
449     }
450 
451     fd = vub_open(vdev_blk->blk_name, wce);
452     if (fd < 0) {
453         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
454         vdev_blk->blk_fd = -1;
455         return -1;
456     }
457     vdev_blk->blk_fd = fd;
458 
459     return 0;
460 }
461 
462 static const VuDevIface vub_iface = {
463     .get_features = vub_get_features,
464     .queue_set_started = vub_queue_set_started,
465     .get_protocol_features = vub_get_protocol_features,
466     .get_config = vub_get_config,
467     .set_config = vub_set_config,
468 };
469 
470 static int unix_sock_new(char *unix_fn)
471 {
472     int sock;
473     struct sockaddr_un un;
474     size_t len;
475 
476     assert(unix_fn);
477 
478     sock = socket(AF_UNIX, SOCK_STREAM, 0);
479     if (sock < 0) {
480         perror("socket");
481         return -1;
482     }
483 
484     un.sun_family = AF_UNIX;
485     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
486     len = sizeof(un.sun_family) + strlen(un.sun_path);
487 
488     (void)unlink(unix_fn);
489     if (bind(sock, (struct sockaddr *)&un, len) < 0) {
490         perror("bind");
491         goto fail;
492     }
493 
494     if (listen(sock, 1) < 0) {
495         perror("listen");
496         goto fail;
497     }
498 
499     return sock;
500 
501 fail:
502     (void)close(sock);
503 
504     return -1;
505 }
506 
507 static void vub_free(struct VubDev *vdev_blk)
508 {
509     if (!vdev_blk) {
510         return;
511     }
512 
513     g_main_loop_unref(vdev_blk->loop);
514     if (vdev_blk->blk_fd >= 0) {
515         close(vdev_blk->blk_fd);
516     }
517     g_free(vdev_blk);
518 }
519 
520 static uint32_t
521 vub_get_blocksize(int fd)
522 {
523     uint32_t blocksize = 512;
524 
525 #if defined(__linux__) && defined(BLKSSZGET)
526     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
527         return blocksize;
528     }
529 #endif
530 
531     return blocksize;
532 }
533 
534 static void
535 vub_initialize_config(int fd, struct virtio_blk_config *config)
536 {
537     off64_t capacity;
538 
539     capacity = lseek64(fd, 0, SEEK_END);
540     config->capacity = capacity >> 9;
541     config->blk_size = vub_get_blocksize(fd);
542     config->size_max = 65536;
543     config->seg_max = 128 - 2;
544     config->min_io_size = 1;
545     config->opt_io_size = 1;
546     config->num_queues = 1;
547     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
548     config->max_discard_sectors = 32768;
549     config->max_discard_seg = 1;
550     config->discard_sector_alignment = config->blk_size >> 9;
551     config->max_write_zeroes_sectors = 32768;
552     config->max_write_zeroes_seg = 1;
553     #endif
554 }
555 
556 static VubDev *
557 vub_new(char *blk_file)
558 {
559     VubDev *vdev_blk;
560 
561     vdev_blk = g_new0(VubDev, 1);
562     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
563     vdev_blk->blk_fd = vub_open(blk_file, 0);
564     if (vdev_blk->blk_fd  < 0) {
565         fprintf(stderr, "Error to open block device %s\n", blk_file);
566         vub_free(vdev_blk);
567         return NULL;
568     }
569     vdev_blk->enable_ro = false;
570     vdev_blk->blkcfg.wce = 0;
571     vdev_blk->blk_name = blk_file;
572 
573     /* fill virtio_blk_config with block parameters */
574     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
575 
576     return vdev_blk;
577 }
578 
579 static int opt_fdnum = -1;
580 static char *opt_socket_path;
581 static char *opt_blk_file;
582 static gboolean opt_print_caps;
583 static gboolean opt_read_only;
584 
585 static GOptionEntry entries[] = {
586     { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
587       "Print capabilities", NULL },
588     { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
589       "Use inherited fd socket", "FDNUM" },
590     { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
591       "Use UNIX socket path", "PATH" },
592     {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
593      "block device or file path", "PATH"},
594     { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
595       "Enable read-only", NULL }
596 };
597 
598 int main(int argc, char **argv)
599 {
600     int lsock = -1, csock = -1;
601     VubDev *vdev_blk = NULL;
602     GError *error = NULL;
603     GOptionContext *context;
604 
605     context = g_option_context_new(NULL);
606     g_option_context_add_main_entries(context, entries, NULL);
607     if (!g_option_context_parse(context, &argc, &argv, &error)) {
608         g_printerr("Option parsing failed: %s\n", error->message);
609         exit(EXIT_FAILURE);
610     }
611     if (opt_print_caps) {
612         g_print("{\n");
613         g_print("  \"type\": \"block\",\n");
614         g_print("  \"features\": [\n");
615         g_print("    \"read-only\",\n");
616         g_print("    \"blk-file\"\n");
617         g_print("  ]\n");
618         g_print("}\n");
619         exit(EXIT_SUCCESS);
620     }
621 
622     if (!opt_blk_file) {
623         g_print("%s\n", g_option_context_get_help(context, true, NULL));
624         exit(EXIT_FAILURE);
625     }
626 
627     if (opt_socket_path) {
628         lsock = unix_sock_new(opt_socket_path);
629         if (lsock < 0) {
630             exit(EXIT_FAILURE);
631         }
632     } else if (opt_fdnum < 0) {
633         g_print("%s\n", g_option_context_get_help(context, true, NULL));
634         exit(EXIT_FAILURE);
635     } else {
636         lsock = opt_fdnum;
637     }
638 
639     csock = accept(lsock, NULL, NULL);
640     if (csock < 0) {
641         g_printerr("Accept error %s\n", strerror(errno));
642         exit(EXIT_FAILURE);
643     }
644 
645     vdev_blk = vub_new(opt_blk_file);
646     if (!vdev_blk) {
647         exit(EXIT_FAILURE);
648     }
649     if (opt_read_only) {
650         vdev_blk->enable_ro = true;
651     }
652 
653     if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
654                   vub_panic_cb, &vub_iface)) {
655         g_printerr("Failed to initialize libvhost-user-glib\n");
656         exit(EXIT_FAILURE);
657     }
658 
659     g_main_loop_run(vdev_blk->loop);
660     g_main_loop_unref(vdev_blk->loop);
661     g_option_context_free(context);
662     vug_deinit(&vdev_blk->parent);
663     vub_free(vdev_blk);
664     if (csock >= 0) {
665         close(csock);
666     }
667     if (lsock >= 0) {
668         close(lsock);
669     }
670     g_free(opt_socket_path);
671     g_free(opt_blk_file);
672 
673     return 0;
674 }
675