1 /*
2  * vhost-user-blk sample application
3  *
4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5  *
6  * Author:
7  *  Changpeng Liu <changpeng.liu@intel.com>
8  *
9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10  * implementation by:
11  *  Felipe Franciosi <felipe@nutanix.com>
12  *  Anthony Liguori <aliguori@us.ibm.com>
13  *
14  * This work is licensed under the terms of the GNU GPL, version 2 only.
15  * See the COPYING file in the top-level directory.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "standard-headers/linux/virtio_blk.h"
20 #include "contrib/libvhost-user/libvhost-user-glib.h"
21 #include "contrib/libvhost-user/libvhost-user.h"
22 
23 #if defined(__linux__)
24 #include <linux/fs.h>
25 #include <sys/ioctl.h>
26 #endif
27 
28 struct virtio_blk_inhdr {
29     unsigned char status;
30 };
31 
32 /* vhost user block device */
33 typedef struct VubDev {
34     VugDev parent;
35     int blk_fd;
36     struct virtio_blk_config blkcfg;
37     bool enable_ro;
38     char *blk_name;
39     GMainLoop *loop;
40 } VubDev;
41 
42 typedef struct VubReq {
43     VuVirtqElement *elem;
44     int64_t sector_num;
45     size_t size;
46     struct virtio_blk_inhdr *in;
47     struct virtio_blk_outhdr *out;
48     VubDev *vdev_blk;
49     struct VuVirtq *vq;
50 } VubReq;
51 
52 /* refer util/iov.c */
53 static size_t vub_iov_size(const struct iovec *iov,
54                               const unsigned int iov_cnt)
55 {
56     size_t len;
57     unsigned int i;
58 
59     len = 0;
60     for (i = 0; i < iov_cnt; i++) {
61         len += iov[i].iov_len;
62     }
63     return len;
64 }
65 
66 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
67 {
68     VugDev *gdev;
69     VubDev *vdev_blk;
70 
71     assert(vu_dev);
72 
73     gdev = container_of(vu_dev, VugDev, parent);
74     vdev_blk = container_of(gdev, VubDev, parent);
75     if (buf) {
76         g_warning("vu_panic: %s", buf);
77     }
78 
79     g_main_loop_quit(vdev_blk->loop);
80 }
81 
82 static void vub_req_complete(VubReq *req)
83 {
84     VugDev *gdev = &req->vdev_blk->parent;
85     VuDev *vu_dev = &gdev->parent;
86 
87     /* IO size with 1 extra status byte */
88     vu_queue_push(vu_dev, req->vq, req->elem,
89                   req->size + 1);
90     vu_queue_notify(vu_dev, req->vq);
91 
92     if (req->elem) {
93         free(req->elem);
94     }
95 
96     g_free(req);
97 }
98 
99 static int vub_open(const char *file_name, bool wce)
100 {
101     int fd;
102     int flags = O_RDWR;
103 
104     if (!wce) {
105         flags |= O_DIRECT;
106     }
107 
108     fd = open(file_name, flags);
109     if (fd < 0) {
110         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
111                 strerror(errno));
112         return -1;
113     }
114 
115     return fd;
116 }
117 
118 static ssize_t
119 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
120 {
121     VubDev *vdev_blk = req->vdev_blk;
122     ssize_t rc;
123 
124     if (!iovcnt) {
125         fprintf(stderr, "Invalid Read IOV count\n");
126         return -1;
127     }
128 
129     req->size = vub_iov_size(iov, iovcnt);
130     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
131     if (rc < 0) {
132         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
133                 vdev_blk->blk_name, req->sector_num, req->size,
134                 strerror(errno));
135         return -1;
136     }
137 
138     return rc;
139 }
140 
141 static ssize_t
142 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
143 {
144     VubDev *vdev_blk = req->vdev_blk;
145     ssize_t rc;
146 
147     if (!iovcnt) {
148         fprintf(stderr, "Invalid Write IOV count\n");
149         return -1;
150     }
151 
152     req->size = vub_iov_size(iov, iovcnt);
153     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
154     if (rc < 0) {
155         fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
156                 vdev_blk->blk_name, req->sector_num, req->size,
157                 strerror(errno));
158         return -1;
159     }
160 
161     return rc;
162 }
163 
164 static void
165 vub_flush(VubReq *req)
166 {
167     VubDev *vdev_blk = req->vdev_blk;
168 
169     fdatasync(vdev_blk->blk_fd);
170 }
171 
172 static int vub_virtio_process_req(VubDev *vdev_blk,
173                                      VuVirtq *vq)
174 {
175     VugDev *gdev = &vdev_blk->parent;
176     VuDev *vu_dev = &gdev->parent;
177     VuVirtqElement *elem;
178     uint32_t type;
179     unsigned in_num;
180     unsigned out_num;
181     VubReq *req;
182 
183     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
184     if (!elem) {
185         return -1;
186     }
187 
188     /* refer to hw/block/virtio_blk.c */
189     if (elem->out_num < 1 || elem->in_num < 1) {
190         fprintf(stderr, "virtio-blk request missing headers\n");
191         free(elem);
192         return -1;
193     }
194 
195     req = g_new0(VubReq, 1);
196     req->vdev_blk = vdev_blk;
197     req->vq = vq;
198     req->elem = elem;
199 
200     in_num = elem->in_num;
201     out_num = elem->out_num;
202 
203     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
204     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
205         fprintf(stderr, "Invalid outhdr size\n");
206         goto err;
207     }
208     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
209     out_num--;
210 
211     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
212         fprintf(stderr, "Invalid inhdr size\n");
213         goto err;
214     }
215     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
216     in_num--;
217 
218     type = le32toh(req->out->type);
219     switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
220         case VIRTIO_BLK_T_IN: {
221             ssize_t ret = 0;
222             bool is_write = type & VIRTIO_BLK_T_OUT;
223             req->sector_num = le64toh(req->out->sector);
224             if (is_write) {
225                 ret  = vub_writev(req, &elem->out_sg[1], out_num);
226             } else {
227                 ret = vub_readv(req, &elem->in_sg[0], in_num);
228             }
229             if (ret >= 0) {
230                 req->in->status = VIRTIO_BLK_S_OK;
231             } else {
232                 req->in->status = VIRTIO_BLK_S_IOERR;
233             }
234             vub_req_complete(req);
235             break;
236         }
237         case VIRTIO_BLK_T_FLUSH: {
238             vub_flush(req);
239             req->in->status = VIRTIO_BLK_S_OK;
240             vub_req_complete(req);
241             break;
242         }
243         case VIRTIO_BLK_T_GET_ID: {
244             size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
245                               VIRTIO_BLK_ID_BYTES);
246             snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
247             req->in->status = VIRTIO_BLK_S_OK;
248             req->size = elem->in_sg[0].iov_len;
249             vub_req_complete(req);
250             break;
251         }
252         default: {
253             req->in->status = VIRTIO_BLK_S_UNSUPP;
254             vub_req_complete(req);
255             break;
256         }
257     }
258 
259     return 0;
260 
261 err:
262     free(elem);
263     g_free(req);
264     return -1;
265 }
266 
267 static void vub_process_vq(VuDev *vu_dev, int idx)
268 {
269     VugDev *gdev;
270     VubDev *vdev_blk;
271     VuVirtq *vq;
272     int ret;
273 
274     if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
275         fprintf(stderr, "VQ Index out of range: %d\n", idx);
276         vub_panic_cb(vu_dev, NULL);
277         return;
278     }
279 
280     gdev = container_of(vu_dev, VugDev, parent);
281     vdev_blk = container_of(gdev, VubDev, parent);
282     assert(vdev_blk);
283 
284     vq = vu_get_queue(vu_dev, idx);
285     assert(vq);
286 
287     while (1) {
288         ret = vub_virtio_process_req(vdev_blk, vq);
289         if (ret) {
290             break;
291         }
292     }
293 }
294 
295 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
296 {
297     VuVirtq *vq;
298 
299     assert(vu_dev);
300 
301     vq = vu_get_queue(vu_dev, idx);
302     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
303 }
304 
305 static uint64_t
306 vub_get_features(VuDev *dev)
307 {
308     uint64_t features;
309     VugDev *gdev;
310     VubDev *vdev_blk;
311 
312     gdev = container_of(dev, VugDev, parent);
313     vdev_blk = container_of(gdev, VubDev, parent);
314 
315     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
316                1ull << VIRTIO_BLK_F_SEG_MAX |
317                1ull << VIRTIO_BLK_F_TOPOLOGY |
318                1ull << VIRTIO_BLK_F_BLK_SIZE |
319                1ull << VIRTIO_BLK_F_FLUSH |
320                1ull << VIRTIO_BLK_F_CONFIG_WCE |
321                1ull << VIRTIO_F_VERSION_1 |
322                1ull << VHOST_USER_F_PROTOCOL_FEATURES;
323 
324     if (vdev_blk->enable_ro) {
325         features |= 1ull << VIRTIO_BLK_F_RO;
326     }
327 
328     return features;
329 }
330 
331 static uint64_t
332 vub_get_protocol_features(VuDev *dev)
333 {
334     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
335 }
336 
337 static int
338 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
339 {
340     VugDev *gdev;
341     VubDev *vdev_blk;
342 
343     gdev = container_of(vu_dev, VugDev, parent);
344     vdev_blk = container_of(gdev, VubDev, parent);
345     memcpy(config, &vdev_blk->blkcfg, len);
346 
347     return 0;
348 }
349 
350 static int
351 vub_set_config(VuDev *vu_dev, const uint8_t *data,
352                uint32_t offset, uint32_t size, uint32_t flags)
353 {
354     VugDev *gdev;
355     VubDev *vdev_blk;
356     uint8_t wce;
357     int fd;
358 
359     /* don't support live migration */
360     if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
361         return -1;
362     }
363 
364     gdev = container_of(vu_dev, VugDev, parent);
365     vdev_blk = container_of(gdev, VubDev, parent);
366 
367     if (offset != offsetof(struct virtio_blk_config, wce) ||
368         size != 1) {
369         return -1;
370     }
371 
372     wce = *data;
373     if (wce == vdev_blk->blkcfg.wce) {
374         /* Do nothing as same with old configuration */
375         return 0;
376     }
377 
378     vdev_blk->blkcfg.wce = wce;
379     fprintf(stdout, "Write Cache Policy Changed\n");
380     if (vdev_blk->blk_fd >= 0) {
381         close(vdev_blk->blk_fd);
382         vdev_blk->blk_fd = -1;
383     }
384 
385     fd = vub_open(vdev_blk->blk_name, wce);
386     if (fd < 0) {
387         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
388         vdev_blk->blk_fd = -1;
389         return -1;
390     }
391     vdev_blk->blk_fd = fd;
392 
393     return 0;
394 }
395 
396 static const VuDevIface vub_iface = {
397     .get_features = vub_get_features,
398     .queue_set_started = vub_queue_set_started,
399     .get_protocol_features = vub_get_protocol_features,
400     .get_config = vub_get_config,
401     .set_config = vub_set_config,
402 };
403 
404 static int unix_sock_new(char *unix_fn)
405 {
406     int sock;
407     struct sockaddr_un un;
408     size_t len;
409 
410     assert(unix_fn);
411 
412     sock = socket(AF_UNIX, SOCK_STREAM, 0);
413     if (sock <= 0) {
414         perror("socket");
415         return -1;
416     }
417 
418     un.sun_family = AF_UNIX;
419     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
420     len = sizeof(un.sun_family) + strlen(un.sun_path);
421 
422     (void)unlink(unix_fn);
423     if (bind(sock, (struct sockaddr *)&un, len) < 0) {
424         perror("bind");
425         goto fail;
426     }
427 
428     if (listen(sock, 1) < 0) {
429         perror("listen");
430         goto fail;
431     }
432 
433     return sock;
434 
435 fail:
436     (void)close(sock);
437 
438     return -1;
439 }
440 
441 static void vub_free(struct VubDev *vdev_blk)
442 {
443     if (!vdev_blk) {
444         return;
445     }
446 
447     g_main_loop_unref(vdev_blk->loop);
448     if (vdev_blk->blk_fd >= 0) {
449         close(vdev_blk->blk_fd);
450     }
451     g_free(vdev_blk);
452 }
453 
454 static uint32_t
455 vub_get_blocksize(int fd)
456 {
457     uint32_t blocksize = 512;
458 
459 #if defined(__linux__) && defined(BLKSSZGET)
460     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
461         return blocksize;
462     }
463 #endif
464 
465     return blocksize;
466 }
467 
468 static void
469 vub_initialize_config(int fd, struct virtio_blk_config *config)
470 {
471     off64_t capacity;
472 
473     capacity = lseek64(fd, 0, SEEK_END);
474     config->capacity = capacity >> 9;
475     config->blk_size = vub_get_blocksize(fd);
476     config->size_max = 65536;
477     config->seg_max = 128 - 2;
478     config->min_io_size = 1;
479     config->opt_io_size = 1;
480     config->num_queues = 1;
481 }
482 
483 static VubDev *
484 vub_new(char *blk_file)
485 {
486     VubDev *vdev_blk;
487 
488     vdev_blk = g_new0(VubDev, 1);
489     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
490     vdev_blk->blk_fd = vub_open(blk_file, 0);
491     if (vdev_blk->blk_fd  < 0) {
492         fprintf(stderr, "Error to open block device %s\n", blk_file);
493         vub_free(vdev_blk);
494         return NULL;
495     }
496     vdev_blk->enable_ro = false;
497     vdev_blk->blkcfg.wce = 0;
498     vdev_blk->blk_name = blk_file;
499 
500     /* fill virtio_blk_config with block parameters */
501     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
502 
503     return vdev_blk;
504 }
505 
506 int main(int argc, char **argv)
507 {
508     int opt;
509     char *unix_socket = NULL;
510     char *blk_file = NULL;
511     bool enable_ro = false;
512     int lsock = -1, csock = -1;
513     VubDev *vdev_blk = NULL;
514 
515     while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
516         switch (opt) {
517         case 'b':
518             blk_file = g_strdup(optarg);
519             break;
520         case 's':
521             unix_socket = g_strdup(optarg);
522             break;
523         case 'r':
524             enable_ro = true;
525             break;
526         case 'h':
527         default:
528             printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
529                    " | -r Enable read-only ] | [ -h ]\n", argv[0]);
530             return 0;
531         }
532     }
533 
534     if (!unix_socket || !blk_file) {
535         printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
536                " | -r Enable read-only ] | [ -h ]\n", argv[0]);
537         return -1;
538     }
539 
540     lsock = unix_sock_new(unix_socket);
541     if (lsock < 0) {
542         goto err;
543     }
544 
545     csock = accept(lsock, (void *)0, (void *)0);
546     if (csock < 0) {
547         fprintf(stderr, "Accept error %s\n", strerror(errno));
548         goto err;
549     }
550 
551     vdev_blk = vub_new(blk_file);
552     if (!vdev_blk) {
553         goto err;
554     }
555     if (enable_ro) {
556         vdev_blk->enable_ro = true;
557     }
558 
559     vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
560 
561     g_main_loop_run(vdev_blk->loop);
562 
563     vug_deinit(&vdev_blk->parent);
564 
565 err:
566     vub_free(vdev_blk);
567     if (csock >= 0) {
568         close(csock);
569     }
570     if (lsock >= 0) {
571         close(lsock);
572     }
573     g_free(unix_socket);
574     g_free(blk_file);
575 
576     return 0;
577 }
578