xref: /openbmc/qemu/contrib/vhost-user-blk/vhost-user-blk.c (revision 5885bcef3d760e84d17eb4113e85f2aea0bd0582)
1 /*
2  * vhost-user-blk sample application
3  *
4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5  *
6  * Author:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10  * implementation by:
11  *  Felipe Franciosi <felipe@nutanix.com>
12  *  Anthony Liguori <aliguori@us.ibm.com>
13  *
14  * This work is licensed under the terms of the GNU GPL, version 2 only.
15  * See the COPYING file in the top-level directory.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/bswap.h"
20 #include "standard-headers/linux/virtio_blk.h"
21 #include "libvhost-user-glib.h"
22 
23 #if defined(__linux__)
24 #include <linux/fs.h>
25 #include <sys/ioctl.h>
26 #endif
27 
28 enum {
29     VHOST_USER_BLK_MAX_QUEUES = 8,
30 };
31 
32 struct virtio_blk_inhdr {
33     unsigned char status;
34 };
35 
36 /* vhost user block device */
37 typedef struct VubDev {
38     VugDev parent;
39     int blk_fd;
40     struct virtio_blk_config blkcfg;
41     bool enable_ro;
42     char *blk_name;
43     GMainLoop *loop;
44 } VubDev;
45 
46 typedef struct VubReq {
47     VuVirtqElement *elem;
48     int64_t sector_num;
49     size_t size;
50     struct virtio_blk_inhdr *in;
51     struct virtio_blk_outhdr *out;
52     VubDev *vdev_blk;
53     struct VuVirtq *vq;
54 } VubReq;
55 
56 /* refer util/iov.c */
vub_iov_size(const struct iovec * iov,const unsigned int iov_cnt)57 static size_t vub_iov_size(const struct iovec *iov,
58                               const unsigned int iov_cnt)
59 {
60     size_t len;
61     unsigned int i;
62 
63     len = 0;
64     for (i = 0; i < iov_cnt; i++) {
65         len += iov[i].iov_len;
66     }
67     return len;
68 }
69 
vub_iov_to_buf(const struct iovec * iov,const unsigned int iov_cnt,void * buf)70 static size_t vub_iov_to_buf(const struct iovec *iov,
71                              const unsigned int iov_cnt, void *buf)
72 {
73     size_t len;
74     unsigned int i;
75 
76     len = 0;
77     for (i = 0; i < iov_cnt; i++) {
78         memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
79         len += iov[i].iov_len;
80     }
81     return len;
82 }
83 
vub_panic_cb(VuDev * vu_dev,const char * buf)84 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
85 {
86     VugDev *gdev;
87     VubDev *vdev_blk;
88 
89     assert(vu_dev);
90 
91     gdev = container_of(vu_dev, VugDev, parent);
92     vdev_blk = container_of(gdev, VubDev, parent);
93     if (buf) {
94         g_warning("vu_panic: %s", buf);
95     }
96 
97     g_main_loop_quit(vdev_blk->loop);
98 }
99 
vub_req_complete(VubReq * req)100 static void vub_req_complete(VubReq *req)
101 {
102     VugDev *gdev = &req->vdev_blk->parent;
103     VuDev *vu_dev = &gdev->parent;
104 
105     /* IO size with 1 extra status byte */
106     vu_queue_push(vu_dev, req->vq, req->elem,
107                   req->size + 1);
108     vu_queue_notify(vu_dev, req->vq);
109 
110     g_free(req->elem);
111     g_free(req);
112 }
113 
vub_open(const char * file_name,bool wce)114 static int vub_open(const char *file_name, bool wce)
115 {
116     int fd;
117     int flags = O_RDWR;
118 
119     if (!wce) {
120         flags |= O_DIRECT;
121     }
122 
123     fd = open(file_name, flags);
124     if (fd < 0) {
125         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
126                 strerror(errno));
127         return -1;
128     }
129 
130     return fd;
131 }
132 
133 static ssize_t
vub_readv(VubReq * req,struct iovec * iov,uint32_t iovcnt)134 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
135 {
136     VubDev *vdev_blk = req->vdev_blk;
137     ssize_t rc;
138 
139     if (!iovcnt) {
140         fprintf(stderr, "Invalid Read IOV count\n");
141         return -1;
142     }
143 
144     req->size = vub_iov_size(iov, iovcnt);
145     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
146     if (rc < 0) {
147         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
148                 vdev_blk->blk_name, req->sector_num, req->size,
149                 strerror(errno));
150         return -1;
151     }
152 
153     return rc;
154 }
155 
156 static ssize_t
vub_writev(VubReq * req,struct iovec * iov,uint32_t iovcnt)157 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
158 {
159     VubDev *vdev_blk = req->vdev_blk;
160     ssize_t rc;
161 
162     if (!iovcnt) {
163         fprintf(stderr, "Invalid Write IOV count\n");
164         return -1;
165     }
166 
167     req->size = vub_iov_size(iov, iovcnt);
168     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
169     if (rc < 0) {
170         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
171                 vdev_blk->blk_name, req->sector_num, req->size,
172                 strerror(errno));
173         return -1;
174     }
175 
176     return rc;
177 }
178 
179 static int
vub_discard_write_zeroes(VubReq * req,struct iovec * iov,uint32_t iovcnt,uint32_t type)180 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
181                          uint32_t type)
182 {
183     struct virtio_blk_discard_write_zeroes *desc;
184     ssize_t size;
185     void *buf;
186 
187     size = vub_iov_size(iov, iovcnt);
188     if (size != sizeof(*desc)) {
189         fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc));
190         return -1;
191     }
192     buf = g_new0(char, size);
193     vub_iov_to_buf(iov, iovcnt, buf);
194 
195     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
196     VubDev *vdev_blk = req->vdev_blk;
197     desc = buf;
198     uint64_t range[2] = { le64_to_cpu(desc->sector) << 9,
199                           (uint64_t)le32_to_cpu(desc->num_sectors) << 9 };
200     if (type == VIRTIO_BLK_T_DISCARD) {
201         if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
202             g_free(buf);
203             return 0;
204         }
205     } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
206         if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
207             g_free(buf);
208             return 0;
209         }
210     }
211     #endif
212 
213     g_free(buf);
214     return -1;
215 }
216 
217 static void
vub_flush(VubReq * req)218 vub_flush(VubReq *req)
219 {
220     VubDev *vdev_blk = req->vdev_blk;
221 
222     fdatasync(vdev_blk->blk_fd);
223 }
224 
vub_virtio_process_req(VubDev * vdev_blk,VuVirtq * vq)225 static int vub_virtio_process_req(VubDev *vdev_blk,
226                                      VuVirtq *vq)
227 {
228     VugDev *gdev = &vdev_blk->parent;
229     VuDev *vu_dev = &gdev->parent;
230     VuVirtqElement *elem;
231     uint32_t type;
232     unsigned in_num;
233     unsigned out_num;
234     VubReq *req;
235 
236     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
237     if (!elem) {
238         return -1;
239     }
240 
241     /* refer to hw/block/virtio_blk.c */
242     if (elem->out_num < 1 || elem->in_num < 1) {
243         fprintf(stderr, "virtio-blk request missing headers\n");
244         g_free(elem);
245         return -1;
246     }
247 
248     req = g_new0(VubReq, 1);
249     req->vdev_blk = vdev_blk;
250     req->vq = vq;
251     req->elem = elem;
252 
253     in_num = elem->in_num;
254     out_num = elem->out_num;
255 
256     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
257     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
258         fprintf(stderr, "Invalid outhdr size\n");
259         goto err;
260     }
261     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
262     out_num--;
263 
264     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
265         fprintf(stderr, "Invalid inhdr size\n");
266         goto err;
267     }
268     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
269     in_num--;
270 
271     type = le32_to_cpu(req->out->type);
272     switch (type & ~VIRTIO_BLK_T_BARRIER) {
273     case VIRTIO_BLK_T_IN:
274     case VIRTIO_BLK_T_OUT: {
275         ssize_t ret = 0;
276         bool is_write = type & VIRTIO_BLK_T_OUT;
277         req->sector_num = le64_to_cpu(req->out->sector);
278         if (is_write) {
279             ret  = vub_writev(req, &elem->out_sg[1], out_num);
280         } else {
281             ret = vub_readv(req, &elem->in_sg[0], in_num);
282         }
283         if (ret >= 0) {
284             req->in->status = VIRTIO_BLK_S_OK;
285         } else {
286             req->in->status = VIRTIO_BLK_S_IOERR;
287         }
288         vub_req_complete(req);
289         break;
290     }
291     case VIRTIO_BLK_T_FLUSH:
292         vub_flush(req);
293         req->in->status = VIRTIO_BLK_S_OK;
294         vub_req_complete(req);
295         break;
296     case VIRTIO_BLK_T_GET_ID: {
297         size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
298                           VIRTIO_BLK_ID_BYTES);
299         snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
300         req->in->status = VIRTIO_BLK_S_OK;
301         req->size = elem->in_sg[0].iov_len;
302         vub_req_complete(req);
303         break;
304     }
305     case VIRTIO_BLK_T_DISCARD:
306     case VIRTIO_BLK_T_WRITE_ZEROES: {
307         int rc;
308         rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
309         if (rc == 0) {
310             req->in->status = VIRTIO_BLK_S_OK;
311         } else {
312             req->in->status = VIRTIO_BLK_S_IOERR;
313         }
314         vub_req_complete(req);
315         break;
316     }
317     default:
318         req->in->status = VIRTIO_BLK_S_UNSUPP;
319         vub_req_complete(req);
320         break;
321     }
322 
323     return 0;
324 
325 err:
326     g_free(elem);
327     g_free(req);
328     return -1;
329 }
330 
vub_process_vq(VuDev * vu_dev,int idx)331 static void vub_process_vq(VuDev *vu_dev, int idx)
332 {
333     VugDev *gdev;
334     VubDev *vdev_blk;
335     VuVirtq *vq;
336     int ret;
337 
338     gdev = container_of(vu_dev, VugDev, parent);
339     vdev_blk = container_of(gdev, VubDev, parent);
340     assert(vdev_blk);
341 
342     vq = vu_get_queue(vu_dev, idx);
343     assert(vq);
344 
345     while (1) {
346         ret = vub_virtio_process_req(vdev_blk, vq);
347         if (ret) {
348             break;
349         }
350     }
351 }
352 
vub_queue_set_started(VuDev * vu_dev,int idx,bool started)353 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
354 {
355     VuVirtq *vq;
356 
357     assert(vu_dev);
358 
359     vq = vu_get_queue(vu_dev, idx);
360     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
361 }
362 
363 static uint64_t
vub_get_features(VuDev * dev)364 vub_get_features(VuDev *dev)
365 {
366     uint64_t features;
367     VugDev *gdev;
368     VubDev *vdev_blk;
369 
370     gdev = container_of(dev, VugDev, parent);
371     vdev_blk = container_of(gdev, VubDev, parent);
372 
373     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
374                1ull << VIRTIO_BLK_F_SEG_MAX |
375                1ull << VIRTIO_BLK_F_TOPOLOGY |
376                1ull << VIRTIO_BLK_F_BLK_SIZE |
377                1ull << VIRTIO_BLK_F_FLUSH |
378                #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
379                1ull << VIRTIO_BLK_F_DISCARD |
380                1ull << VIRTIO_BLK_F_WRITE_ZEROES |
381                #endif
382                1ull << VIRTIO_BLK_F_CONFIG_WCE;
383 
384     if (vdev_blk->enable_ro) {
385         features |= 1ull << VIRTIO_BLK_F_RO;
386     }
387 
388     return features;
389 }
390 
391 static uint64_t
vub_get_protocol_features(VuDev * dev)392 vub_get_protocol_features(VuDev *dev)
393 {
394     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
395            1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
396 }
397 
398 static int
vub_get_config(VuDev * vu_dev,uint8_t * config,uint32_t len)399 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
400 {
401     VugDev *gdev;
402     VubDev *vdev_blk;
403 
404     if (len > sizeof(struct virtio_blk_config)) {
405         return -1;
406     }
407 
408     gdev = container_of(vu_dev, VugDev, parent);
409     vdev_blk = container_of(gdev, VubDev, parent);
410     memcpy(config, &vdev_blk->blkcfg, len);
411 
412     return 0;
413 }
414 
415 static int
vub_set_config(VuDev * vu_dev,const uint8_t * data,uint32_t offset,uint32_t size,uint32_t flags)416 vub_set_config(VuDev *vu_dev, const uint8_t *data,
417                uint32_t offset, uint32_t size, uint32_t flags)
418 {
419     VugDev *gdev;
420     VubDev *vdev_blk;
421     uint8_t wce;
422     int fd;
423 
424     /* don't support live migration */
425     if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) {
426         return -1;
427     }
428 
429     gdev = container_of(vu_dev, VugDev, parent);
430     vdev_blk = container_of(gdev, VubDev, parent);
431 
432     if (offset != offsetof(struct virtio_blk_config, wce) ||
433         size != 1) {
434         return -1;
435     }
436 
437     wce = *data;
438     if (wce == vdev_blk->blkcfg.wce) {
439         /* Do nothing as same with old configuration */
440         return 0;
441     }
442 
443     vdev_blk->blkcfg.wce = wce;
444     fprintf(stdout, "Write Cache Policy Changed\n");
445     if (vdev_blk->blk_fd >= 0) {
446         close(vdev_blk->blk_fd);
447         vdev_blk->blk_fd = -1;
448     }
449 
450     fd = vub_open(vdev_blk->blk_name, wce);
451     if (fd < 0) {
452         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
453         vdev_blk->blk_fd = -1;
454         return -1;
455     }
456     vdev_blk->blk_fd = fd;
457 
458     return 0;
459 }
460 
461 static const VuDevIface vub_iface = {
462     .get_features = vub_get_features,
463     .queue_set_started = vub_queue_set_started,
464     .get_protocol_features = vub_get_protocol_features,
465     .get_config = vub_get_config,
466     .set_config = vub_set_config,
467 };
468 
unix_sock_new(char * unix_fn)469 static int unix_sock_new(char *unix_fn)
470 {
471     int sock;
472     struct sockaddr_un un;
473 
474     assert(unix_fn);
475 
476     sock = socket(AF_UNIX, SOCK_STREAM, 0);
477     if (sock < 0) {
478         perror("socket");
479         return -1;
480     }
481 
482     un.sun_family = AF_UNIX;
483     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
484 
485     (void)unlink(unix_fn);
486     if (bind(sock, (struct sockaddr *)&un, sizeof(un)) < 0) {
487         perror("bind");
488         goto fail;
489     }
490 
491     if (listen(sock, 1) < 0) {
492         perror("listen");
493         goto fail;
494     }
495 
496     return sock;
497 
498 fail:
499     (void)close(sock);
500 
501     return -1;
502 }
503 
vub_free(struct VubDev * vdev_blk)504 static void vub_free(struct VubDev *vdev_blk)
505 {
506     if (!vdev_blk) {
507         return;
508     }
509 
510     g_main_loop_unref(vdev_blk->loop);
511     if (vdev_blk->blk_fd >= 0) {
512         close(vdev_blk->blk_fd);
513     }
514     g_free(vdev_blk);
515 }
516 
517 static uint32_t
vub_get_blocksize(int fd)518 vub_get_blocksize(int fd)
519 {
520     uint32_t blocksize = 512;
521 
522 #if defined(__linux__) && defined(BLKSSZGET)
523     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
524         return blocksize;
525     }
526 #endif
527 
528     return blocksize;
529 }
530 
531 static void
vub_initialize_config(int fd,struct virtio_blk_config * config)532 vub_initialize_config(int fd, struct virtio_blk_config *config)
533 {
534     off_t capacity;
535 
536     capacity = lseek(fd, 0, SEEK_END);
537     config->capacity = capacity >> 9;
538     config->blk_size = vub_get_blocksize(fd);
539     config->size_max = 65536;
540     config->seg_max = 128 - 2;
541     config->min_io_size = 1;
542     config->opt_io_size = 1;
543     config->num_queues = 1;
544     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
545     config->max_discard_sectors = 32768;
546     config->max_discard_seg = 1;
547     config->discard_sector_alignment = config->blk_size >> 9;
548     config->max_write_zeroes_sectors = 32768;
549     config->max_write_zeroes_seg = 1;
550     #endif
551 }
552 
553 static VubDev *
vub_new(char * blk_file)554 vub_new(char *blk_file)
555 {
556     VubDev *vdev_blk;
557 
558     vdev_blk = g_new0(VubDev, 1);
559     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
560     vdev_blk->blk_fd = vub_open(blk_file, 0);
561     if (vdev_blk->blk_fd  < 0) {
562         fprintf(stderr, "Error to open block device %s\n", blk_file);
563         vub_free(vdev_blk);
564         return NULL;
565     }
566     vdev_blk->enable_ro = false;
567     vdev_blk->blkcfg.wce = 0;
568     vdev_blk->blk_name = blk_file;
569 
570     /* fill virtio_blk_config with block parameters */
571     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
572 
573     return vdev_blk;
574 }
575 
576 static int opt_fdnum = -1;
577 static char *opt_socket_path;
578 static char *opt_blk_file;
579 static gboolean opt_print_caps;
580 static gboolean opt_read_only;
581 
582 static GOptionEntry entries[] = {
583     { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
584       "Print capabilities", NULL },
585     { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
586       "Use inherited fd socket", "FDNUM" },
587     { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
588       "Use UNIX socket path", "PATH" },
589     {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
590      "block device or file path", "PATH"},
591     { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
592       "Enable read-only", NULL },
593     { NULL, },
594 };
595 
main(int argc,char ** argv)596 int main(int argc, char **argv)
597 {
598     int lsock = -1, csock = -1;
599     VubDev *vdev_blk = NULL;
600     GError *error = NULL;
601     GOptionContext *context;
602 
603     context = g_option_context_new(NULL);
604     g_option_context_add_main_entries(context, entries, NULL);
605     if (!g_option_context_parse(context, &argc, &argv, &error)) {
606         g_printerr("Option parsing failed: %s\n", error->message);
607         exit(EXIT_FAILURE);
608     }
609     if (opt_print_caps) {
610         g_print("{\n");
611         g_print("  \"type\": \"block\",\n");
612         g_print("  \"features\": [\n");
613         g_print("    \"read-only\",\n");
614         g_print("    \"blk-file\"\n");
615         g_print("  ]\n");
616         g_print("}\n");
617         exit(EXIT_SUCCESS);
618     }
619 
620     if (!opt_blk_file) {
621         g_print("%s\n", g_option_context_get_help(context, true, NULL));
622         exit(EXIT_FAILURE);
623     }
624 
625     if (opt_socket_path) {
626         lsock = unix_sock_new(opt_socket_path);
627         if (lsock < 0) {
628             exit(EXIT_FAILURE);
629         }
630     } else if (opt_fdnum < 0) {
631         g_print("%s\n", g_option_context_get_help(context, true, NULL));
632         exit(EXIT_FAILURE);
633     } else {
634         lsock = opt_fdnum;
635     }
636 
637     csock = accept(lsock, NULL, NULL);
638     if (csock < 0) {
639         g_printerr("Accept error %s\n", strerror(errno));
640         exit(EXIT_FAILURE);
641     }
642 
643     vdev_blk = vub_new(opt_blk_file);
644     if (!vdev_blk) {
645         exit(EXIT_FAILURE);
646     }
647     if (opt_read_only) {
648         vdev_blk->enable_ro = true;
649     }
650 
651     if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
652                   vub_panic_cb, &vub_iface)) {
653         g_printerr("Failed to initialize libvhost-user-glib\n");
654         exit(EXIT_FAILURE);
655     }
656 
657     g_main_loop_run(vdev_blk->loop);
658     g_main_loop_unref(vdev_blk->loop);
659     g_option_context_free(context);
660     vug_deinit(&vdev_blk->parent);
661     vub_free(vdev_blk);
662     if (csock >= 0) {
663         close(csock);
664     }
665     if (lsock >= 0) {
666         close(lsock);
667     }
668     g_free(opt_socket_path);
669     g_free(opt_blk_file);
670 
671     return 0;
672 }
673