1 /*
2  * vhost-user-blk sample application
3  *
4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5  *
6  * Author:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10  * implementation by:
11  *  Felipe Franciosi <felipe@nutanix.com>
12  *  Anthony Liguori <aliguori@us.ibm.com>
13  *
14  * This work is licensed under the terms of the GNU GPL, version 2 only.
15  * See the COPYING file in the top-level directory.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "standard-headers/linux/virtio_blk.h"
20 #include "libvhost-user-glib.h"
21 
22 #if defined(__linux__)
23 #include <linux/fs.h>
24 #include <sys/ioctl.h>
25 #endif
26 
27 enum {
28     VHOST_USER_BLK_MAX_QUEUES = 8,
29 };
30 
31 struct virtio_blk_inhdr {
32     unsigned char status;
33 };
34 
35 /* vhost user block device */
36 typedef struct VubDev {
37     VugDev parent;
38     int blk_fd;
39     struct virtio_blk_config blkcfg;
40     bool enable_ro;
41     char *blk_name;
42     GMainLoop *loop;
43 } VubDev;
44 
45 typedef struct VubReq {
46     VuVirtqElement *elem;
47     int64_t sector_num;
48     size_t size;
49     struct virtio_blk_inhdr *in;
50     struct virtio_blk_outhdr *out;
51     VubDev *vdev_blk;
52     struct VuVirtq *vq;
53 } VubReq;
54 
55 /* refer util/iov.c */
56 static size_t vub_iov_size(const struct iovec *iov,
57                               const unsigned int iov_cnt)
58 {
59     size_t len;
60     unsigned int i;
61 
62     len = 0;
63     for (i = 0; i < iov_cnt; i++) {
64         len += iov[i].iov_len;
65     }
66     return len;
67 }
68 
69 static size_t vub_iov_to_buf(const struct iovec *iov,
70                              const unsigned int iov_cnt, void *buf)
71 {
72     size_t len;
73     unsigned int i;
74 
75     len = 0;
76     for (i = 0; i < iov_cnt; i++) {
77         memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
78         len += iov[i].iov_len;
79     }
80     return len;
81 }
82 
83 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
84 {
85     VugDev *gdev;
86     VubDev *vdev_blk;
87 
88     assert(vu_dev);
89 
90     gdev = container_of(vu_dev, VugDev, parent);
91     vdev_blk = container_of(gdev, VubDev, parent);
92     if (buf) {
93         g_warning("vu_panic: %s", buf);
94     }
95 
96     g_main_loop_quit(vdev_blk->loop);
97 }
98 
99 static void vub_req_complete(VubReq *req)
100 {
101     VugDev *gdev = &req->vdev_blk->parent;
102     VuDev *vu_dev = &gdev->parent;
103 
104     /* IO size with 1 extra status byte */
105     vu_queue_push(vu_dev, req->vq, req->elem,
106                   req->size + 1);
107     vu_queue_notify(vu_dev, req->vq);
108 
109     if (req->elem) {
110         free(req->elem);
111     }
112 
113     g_free(req);
114 }
115 
116 static int vub_open(const char *file_name, bool wce)
117 {
118     int fd;
119     int flags = O_RDWR;
120 
121     if (!wce) {
122         flags |= O_DIRECT;
123     }
124 
125     fd = open(file_name, flags);
126     if (fd < 0) {
127         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
128                 strerror(errno));
129         return -1;
130     }
131 
132     return fd;
133 }
134 
135 static ssize_t
136 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
137 {
138     VubDev *vdev_blk = req->vdev_blk;
139     ssize_t rc;
140 
141     if (!iovcnt) {
142         fprintf(stderr, "Invalid Read IOV count\n");
143         return -1;
144     }
145 
146     req->size = vub_iov_size(iov, iovcnt);
147     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
148     if (rc < 0) {
149         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
150                 vdev_blk->blk_name, req->sector_num, req->size,
151                 strerror(errno));
152         return -1;
153     }
154 
155     return rc;
156 }
157 
158 static ssize_t
159 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
160 {
161     VubDev *vdev_blk = req->vdev_blk;
162     ssize_t rc;
163 
164     if (!iovcnt) {
165         fprintf(stderr, "Invalid Write IOV count\n");
166         return -1;
167     }
168 
169     req->size = vub_iov_size(iov, iovcnt);
170     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
171     if (rc < 0) {
172         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
173                 vdev_blk->blk_name, req->sector_num, req->size,
174                 strerror(errno));
175         return -1;
176     }
177 
178     return rc;
179 }
180 
181 static int
182 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
183                          uint32_t type)
184 {
185     struct virtio_blk_discard_write_zeroes *desc;
186     ssize_t size;
187     void *buf;
188 
189     size = vub_iov_size(iov, iovcnt);
190     if (size != sizeof(*desc)) {
191         fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc));
192         return -1;
193     }
194     buf = g_new0(char, size);
195     vub_iov_to_buf(iov, iovcnt, buf);
196 
197     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
198     VubDev *vdev_blk = req->vdev_blk;
199     desc = (struct virtio_blk_discard_write_zeroes *)buf;
200     uint64_t range[2] = { le64toh(desc->sector) << 9,
201                           le32toh(desc->num_sectors) << 9 };
202     if (type == VIRTIO_BLK_T_DISCARD) {
203         if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
204             g_free(buf);
205             return 0;
206         }
207     } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
208         if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
209             g_free(buf);
210             return 0;
211         }
212     }
213     #endif
214 
215     g_free(buf);
216     return -1;
217 }
218 
219 static void
220 vub_flush(VubReq *req)
221 {
222     VubDev *vdev_blk = req->vdev_blk;
223 
224     fdatasync(vdev_blk->blk_fd);
225 }
226 
227 static int vub_virtio_process_req(VubDev *vdev_blk,
228                                      VuVirtq *vq)
229 {
230     VugDev *gdev = &vdev_blk->parent;
231     VuDev *vu_dev = &gdev->parent;
232     VuVirtqElement *elem;
233     uint32_t type;
234     unsigned in_num;
235     unsigned out_num;
236     VubReq *req;
237 
238     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
239     if (!elem) {
240         return -1;
241     }
242 
243     /* refer to hw/block/virtio_blk.c */
244     if (elem->out_num < 1 || elem->in_num < 1) {
245         fprintf(stderr, "virtio-blk request missing headers\n");
246         free(elem);
247         return -1;
248     }
249 
250     req = g_new0(VubReq, 1);
251     req->vdev_blk = vdev_blk;
252     req->vq = vq;
253     req->elem = elem;
254 
255     in_num = elem->in_num;
256     out_num = elem->out_num;
257 
258     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
259     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
260         fprintf(stderr, "Invalid outhdr size\n");
261         goto err;
262     }
263     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
264     out_num--;
265 
266     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
267         fprintf(stderr, "Invalid inhdr size\n");
268         goto err;
269     }
270     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
271     in_num--;
272 
273     type = le32toh(req->out->type);
274     switch (type & ~VIRTIO_BLK_T_BARRIER) {
275     case VIRTIO_BLK_T_IN:
276     case VIRTIO_BLK_T_OUT: {
277         ssize_t ret = 0;
278         bool is_write = type & VIRTIO_BLK_T_OUT;
279         req->sector_num = le64toh(req->out->sector);
280         if (is_write) {
281             ret  = vub_writev(req, &elem->out_sg[1], out_num);
282         } else {
283             ret = vub_readv(req, &elem->in_sg[0], in_num);
284         }
285         if (ret >= 0) {
286             req->in->status = VIRTIO_BLK_S_OK;
287         } else {
288             req->in->status = VIRTIO_BLK_S_IOERR;
289         }
290         vub_req_complete(req);
291         break;
292     }
293     case VIRTIO_BLK_T_FLUSH:
294         vub_flush(req);
295         req->in->status = VIRTIO_BLK_S_OK;
296         vub_req_complete(req);
297         break;
298     case VIRTIO_BLK_T_GET_ID: {
299         size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
300                           VIRTIO_BLK_ID_BYTES);
301         snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
302         req->in->status = VIRTIO_BLK_S_OK;
303         req->size = elem->in_sg[0].iov_len;
304         vub_req_complete(req);
305         break;
306     }
307     case VIRTIO_BLK_T_DISCARD:
308     case VIRTIO_BLK_T_WRITE_ZEROES: {
309         int rc;
310         rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
311         if (rc == 0) {
312             req->in->status = VIRTIO_BLK_S_OK;
313         } else {
314             req->in->status = VIRTIO_BLK_S_IOERR;
315         }
316         vub_req_complete(req);
317         break;
318     }
319     default:
320         req->in->status = VIRTIO_BLK_S_UNSUPP;
321         vub_req_complete(req);
322         break;
323     }
324 
325     return 0;
326 
327 err:
328     free(elem);
329     g_free(req);
330     return -1;
331 }
332 
333 static void vub_process_vq(VuDev *vu_dev, int idx)
334 {
335     VugDev *gdev;
336     VubDev *vdev_blk;
337     VuVirtq *vq;
338     int ret;
339 
340     gdev = container_of(vu_dev, VugDev, parent);
341     vdev_blk = container_of(gdev, VubDev, parent);
342     assert(vdev_blk);
343 
344     vq = vu_get_queue(vu_dev, idx);
345     assert(vq);
346 
347     while (1) {
348         ret = vub_virtio_process_req(vdev_blk, vq);
349         if (ret) {
350             break;
351         }
352     }
353 }
354 
355 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
356 {
357     VuVirtq *vq;
358 
359     assert(vu_dev);
360 
361     vq = vu_get_queue(vu_dev, idx);
362     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
363 }
364 
365 static uint64_t
366 vub_get_features(VuDev *dev)
367 {
368     uint64_t features;
369     VugDev *gdev;
370     VubDev *vdev_blk;
371 
372     gdev = container_of(dev, VugDev, parent);
373     vdev_blk = container_of(gdev, VubDev, parent);
374 
375     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
376                1ull << VIRTIO_BLK_F_SEG_MAX |
377                1ull << VIRTIO_BLK_F_TOPOLOGY |
378                1ull << VIRTIO_BLK_F_BLK_SIZE |
379                1ull << VIRTIO_BLK_F_FLUSH |
380                #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
381                1ull << VIRTIO_BLK_F_DISCARD |
382                1ull << VIRTIO_BLK_F_WRITE_ZEROES |
383                #endif
384                1ull << VIRTIO_BLK_F_CONFIG_WCE;
385 
386     if (vdev_blk->enable_ro) {
387         features |= 1ull << VIRTIO_BLK_F_RO;
388     }
389 
390     return features;
391 }
392 
393 static uint64_t
394 vub_get_protocol_features(VuDev *dev)
395 {
396     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
397            1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
398 }
399 
400 static int
401 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
402 {
403     VugDev *gdev;
404     VubDev *vdev_blk;
405 
406     if (len > sizeof(struct virtio_blk_config)) {
407         return -1;
408     }
409 
410     gdev = container_of(vu_dev, VugDev, parent);
411     vdev_blk = container_of(gdev, VubDev, parent);
412     memcpy(config, &vdev_blk->blkcfg, len);
413 
414     return 0;
415 }
416 
417 static int
418 vub_set_config(VuDev *vu_dev, const uint8_t *data,
419                uint32_t offset, uint32_t size, uint32_t flags)
420 {
421     VugDev *gdev;
422     VubDev *vdev_blk;
423     uint8_t wce;
424     int fd;
425 
426     /* don't support live migration */
427     if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
428         return -1;
429     }
430 
431     gdev = container_of(vu_dev, VugDev, parent);
432     vdev_blk = container_of(gdev, VubDev, parent);
433 
434     if (offset != offsetof(struct virtio_blk_config, wce) ||
435         size != 1) {
436         return -1;
437     }
438 
439     wce = *data;
440     if (wce == vdev_blk->blkcfg.wce) {
441         /* Do nothing as same with old configuration */
442         return 0;
443     }
444 
445     vdev_blk->blkcfg.wce = wce;
446     fprintf(stdout, "Write Cache Policy Changed\n");
447     if (vdev_blk->blk_fd >= 0) {
448         close(vdev_blk->blk_fd);
449         vdev_blk->blk_fd = -1;
450     }
451 
452     fd = vub_open(vdev_blk->blk_name, wce);
453     if (fd < 0) {
454         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
455         vdev_blk->blk_fd = -1;
456         return -1;
457     }
458     vdev_blk->blk_fd = fd;
459 
460     return 0;
461 }
462 
463 static const VuDevIface vub_iface = {
464     .get_features = vub_get_features,
465     .queue_set_started = vub_queue_set_started,
466     .get_protocol_features = vub_get_protocol_features,
467     .get_config = vub_get_config,
468     .set_config = vub_set_config,
469 };
470 
471 static int unix_sock_new(char *unix_fn)
472 {
473     int sock;
474     struct sockaddr_un un;
475     size_t len;
476 
477     assert(unix_fn);
478 
479     sock = socket(AF_UNIX, SOCK_STREAM, 0);
480     if (sock < 0) {
481         perror("socket");
482         return -1;
483     }
484 
485     un.sun_family = AF_UNIX;
486     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
487     len = sizeof(un.sun_family) + strlen(un.sun_path);
488 
489     (void)unlink(unix_fn);
490     if (bind(sock, (struct sockaddr *)&un, len) < 0) {
491         perror("bind");
492         goto fail;
493     }
494 
495     if (listen(sock, 1) < 0) {
496         perror("listen");
497         goto fail;
498     }
499 
500     return sock;
501 
502 fail:
503     (void)close(sock);
504 
505     return -1;
506 }
507 
508 static void vub_free(struct VubDev *vdev_blk)
509 {
510     if (!vdev_blk) {
511         return;
512     }
513 
514     g_main_loop_unref(vdev_blk->loop);
515     if (vdev_blk->blk_fd >= 0) {
516         close(vdev_blk->blk_fd);
517     }
518     g_free(vdev_blk);
519 }
520 
521 static uint32_t
522 vub_get_blocksize(int fd)
523 {
524     uint32_t blocksize = 512;
525 
526 #if defined(__linux__) && defined(BLKSSZGET)
527     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
528         return blocksize;
529     }
530 #endif
531 
532     return blocksize;
533 }
534 
535 static void
536 vub_initialize_config(int fd, struct virtio_blk_config *config)
537 {
538     off64_t capacity;
539 
540     capacity = lseek64(fd, 0, SEEK_END);
541     config->capacity = capacity >> 9;
542     config->blk_size = vub_get_blocksize(fd);
543     config->size_max = 65536;
544     config->seg_max = 128 - 2;
545     config->min_io_size = 1;
546     config->opt_io_size = 1;
547     config->num_queues = 1;
548     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
549     config->max_discard_sectors = 32768;
550     config->max_discard_seg = 1;
551     config->discard_sector_alignment = config->blk_size >> 9;
552     config->max_write_zeroes_sectors = 32768;
553     config->max_write_zeroes_seg = 1;
554     #endif
555 }
556 
557 static VubDev *
558 vub_new(char *blk_file)
559 {
560     VubDev *vdev_blk;
561 
562     vdev_blk = g_new0(VubDev, 1);
563     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
564     vdev_blk->blk_fd = vub_open(blk_file, 0);
565     if (vdev_blk->blk_fd  < 0) {
566         fprintf(stderr, "Error to open block device %s\n", blk_file);
567         vub_free(vdev_blk);
568         return NULL;
569     }
570     vdev_blk->enable_ro = false;
571     vdev_blk->blkcfg.wce = 0;
572     vdev_blk->blk_name = blk_file;
573 
574     /* fill virtio_blk_config with block parameters */
575     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
576 
577     return vdev_blk;
578 }
579 
580 static int opt_fdnum = -1;
581 static char *opt_socket_path;
582 static char *opt_blk_file;
583 static gboolean opt_print_caps;
584 static gboolean opt_read_only;
585 
586 static GOptionEntry entries[] = {
587     { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
588       "Print capabilities", NULL },
589     { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
590       "Use inherited fd socket", "FDNUM" },
591     { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
592       "Use UNIX socket path", "PATH" },
593     {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
594      "block device or file path", "PATH"},
595     { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
596       "Enable read-only", NULL },
597     { NULL, },
598 };
599 
600 int main(int argc, char **argv)
601 {
602     int lsock = -1, csock = -1;
603     VubDev *vdev_blk = NULL;
604     GError *error = NULL;
605     GOptionContext *context;
606 
607     context = g_option_context_new(NULL);
608     g_option_context_add_main_entries(context, entries, NULL);
609     if (!g_option_context_parse(context, &argc, &argv, &error)) {
610         g_printerr("Option parsing failed: %s\n", error->message);
611         exit(EXIT_FAILURE);
612     }
613     if (opt_print_caps) {
614         g_print("{\n");
615         g_print("  \"type\": \"block\",\n");
616         g_print("  \"features\": [\n");
617         g_print("    \"read-only\",\n");
618         g_print("    \"blk-file\"\n");
619         g_print("  ]\n");
620         g_print("}\n");
621         exit(EXIT_SUCCESS);
622     }
623 
624     if (!opt_blk_file) {
625         g_print("%s\n", g_option_context_get_help(context, true, NULL));
626         exit(EXIT_FAILURE);
627     }
628 
629     if (opt_socket_path) {
630         lsock = unix_sock_new(opt_socket_path);
631         if (lsock < 0) {
632             exit(EXIT_FAILURE);
633         }
634     } else if (opt_fdnum < 0) {
635         g_print("%s\n", g_option_context_get_help(context, true, NULL));
636         exit(EXIT_FAILURE);
637     } else {
638         lsock = opt_fdnum;
639     }
640 
641     csock = accept(lsock, NULL, NULL);
642     if (csock < 0) {
643         g_printerr("Accept error %s\n", strerror(errno));
644         exit(EXIT_FAILURE);
645     }
646 
647     vdev_blk = vub_new(opt_blk_file);
648     if (!vdev_blk) {
649         exit(EXIT_FAILURE);
650     }
651     if (opt_read_only) {
652         vdev_blk->enable_ro = true;
653     }
654 
655     if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
656                   vub_panic_cb, &vub_iface)) {
657         g_printerr("Failed to initialize libvhost-user-glib\n");
658         exit(EXIT_FAILURE);
659     }
660 
661     g_main_loop_run(vdev_blk->loop);
662     g_main_loop_unref(vdev_blk->loop);
663     g_option_context_free(context);
664     vug_deinit(&vdev_blk->parent);
665     vub_free(vdev_blk);
666     if (csock >= 0) {
667         close(csock);
668     }
669     if (lsock >= 0) {
670         close(lsock);
671     }
672     g_free(opt_socket_path);
673     g_free(opt_blk_file);
674 
675     return 0;
676 }
677