xref: /openbmc/qemu/contrib/vhost-user-blk/vhost-user-blk.c (revision 5885bcef3d760e84d17eb4113e85f2aea0bd0582)
1406d2aa2SChangpeng Liu /*
2406d2aa2SChangpeng Liu  * vhost-user-blk sample application
3406d2aa2SChangpeng Liu  *
4406d2aa2SChangpeng Liu  * Copyright (c) 2017 Intel Corporation. All rights reserved.
5406d2aa2SChangpeng Liu  *
6406d2aa2SChangpeng Liu  * Author:
7406d2aa2SChangpeng Liu  *  Changpeng Liu <changpeng.liu@intel.com>
8406d2aa2SChangpeng Liu  *
9406d2aa2SChangpeng Liu  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10406d2aa2SChangpeng Liu  * implementation by:
11406d2aa2SChangpeng Liu  *  Felipe Franciosi <felipe@nutanix.com>
12406d2aa2SChangpeng Liu  *  Anthony Liguori <aliguori@us.ibm.com>
13406d2aa2SChangpeng Liu  *
14406d2aa2SChangpeng Liu  * This work is licensed under the terms of the GNU GPL, version 2 only.
15406d2aa2SChangpeng Liu  * See the COPYING file in the top-level directory.
16406d2aa2SChangpeng Liu  */
17406d2aa2SChangpeng Liu 
18406d2aa2SChangpeng Liu #include "qemu/osdep.h"
195ab04420SStefano Garzarella #include "qemu/bswap.h"
20406d2aa2SChangpeng Liu #include "standard-headers/linux/virtio_blk.h"
210df750e9SMarc-André Lureau #include "libvhost-user-glib.h"
22406d2aa2SChangpeng Liu 
23a56de056SPeter Xu #if defined(__linux__)
24a56de056SPeter Xu #include <linux/fs.h>
25a56de056SPeter Xu #include <sys/ioctl.h>
26a56de056SPeter Xu #endif
27406d2aa2SChangpeng Liu 
286f5fd837SStefan Hajnoczi enum {
296f5fd837SStefan Hajnoczi     VHOST_USER_BLK_MAX_QUEUES = 8,
306f5fd837SStefan Hajnoczi };
316f5fd837SStefan Hajnoczi 
32406d2aa2SChangpeng Liu struct virtio_blk_inhdr {
33406d2aa2SChangpeng Liu     unsigned char status;
34406d2aa2SChangpeng Liu };
35406d2aa2SChangpeng Liu 
36406d2aa2SChangpeng Liu /* vhost user block device */
37406d2aa2SChangpeng Liu typedef struct VubDev {
38406d2aa2SChangpeng Liu     VugDev parent;
39406d2aa2SChangpeng Liu     int blk_fd;
40406d2aa2SChangpeng Liu     struct virtio_blk_config blkcfg;
4125b1d45aSChangpeng Liu     bool enable_ro;
42406d2aa2SChangpeng Liu     char *blk_name;
43406d2aa2SChangpeng Liu     GMainLoop *loop;
44406d2aa2SChangpeng Liu } VubDev;
45406d2aa2SChangpeng Liu 
46406d2aa2SChangpeng Liu typedef struct VubReq {
47406d2aa2SChangpeng Liu     VuVirtqElement *elem;
48406d2aa2SChangpeng Liu     int64_t sector_num;
49406d2aa2SChangpeng Liu     size_t size;
50406d2aa2SChangpeng Liu     struct virtio_blk_inhdr *in;
51406d2aa2SChangpeng Liu     struct virtio_blk_outhdr *out;
52406d2aa2SChangpeng Liu     VubDev *vdev_blk;
53406d2aa2SChangpeng Liu     struct VuVirtq *vq;
54406d2aa2SChangpeng Liu } VubReq;
55406d2aa2SChangpeng Liu 
56406d2aa2SChangpeng Liu /* refer util/iov.c */
vub_iov_size(const struct iovec * iov,const unsigned int iov_cnt)57406d2aa2SChangpeng Liu static size_t vub_iov_size(const struct iovec *iov,
58406d2aa2SChangpeng Liu                               const unsigned int iov_cnt)
59406d2aa2SChangpeng Liu {
60406d2aa2SChangpeng Liu     size_t len;
61406d2aa2SChangpeng Liu     unsigned int i;
62406d2aa2SChangpeng Liu 
63406d2aa2SChangpeng Liu     len = 0;
64406d2aa2SChangpeng Liu     for (i = 0; i < iov_cnt; i++) {
65406d2aa2SChangpeng Liu         len += iov[i].iov_len;
66406d2aa2SChangpeng Liu     }
67406d2aa2SChangpeng Liu     return len;
68406d2aa2SChangpeng Liu }
69406d2aa2SChangpeng Liu 
vub_iov_to_buf(const struct iovec * iov,const unsigned int iov_cnt,void * buf)70caa1ee43SChangpeng Liu static size_t vub_iov_to_buf(const struct iovec *iov,
71caa1ee43SChangpeng Liu                              const unsigned int iov_cnt, void *buf)
72caa1ee43SChangpeng Liu {
73caa1ee43SChangpeng Liu     size_t len;
74caa1ee43SChangpeng Liu     unsigned int i;
75caa1ee43SChangpeng Liu 
76caa1ee43SChangpeng Liu     len = 0;
77caa1ee43SChangpeng Liu     for (i = 0; i < iov_cnt; i++) {
78caa1ee43SChangpeng Liu         memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
79caa1ee43SChangpeng Liu         len += iov[i].iov_len;
80caa1ee43SChangpeng Liu     }
81caa1ee43SChangpeng Liu     return len;
82caa1ee43SChangpeng Liu }
83caa1ee43SChangpeng Liu 
vub_panic_cb(VuDev * vu_dev,const char * buf)84406d2aa2SChangpeng Liu static void vub_panic_cb(VuDev *vu_dev, const char *buf)
85406d2aa2SChangpeng Liu {
86406d2aa2SChangpeng Liu     VugDev *gdev;
87406d2aa2SChangpeng Liu     VubDev *vdev_blk;
88406d2aa2SChangpeng Liu 
89406d2aa2SChangpeng Liu     assert(vu_dev);
90406d2aa2SChangpeng Liu 
91406d2aa2SChangpeng Liu     gdev = container_of(vu_dev, VugDev, parent);
92406d2aa2SChangpeng Liu     vdev_blk = container_of(gdev, VubDev, parent);
93406d2aa2SChangpeng Liu     if (buf) {
94406d2aa2SChangpeng Liu         g_warning("vu_panic: %s", buf);
95406d2aa2SChangpeng Liu     }
96406d2aa2SChangpeng Liu 
97406d2aa2SChangpeng Liu     g_main_loop_quit(vdev_blk->loop);
98406d2aa2SChangpeng Liu }
99406d2aa2SChangpeng Liu 
vub_req_complete(VubReq * req)100406d2aa2SChangpeng Liu static void vub_req_complete(VubReq *req)
101406d2aa2SChangpeng Liu {
102406d2aa2SChangpeng Liu     VugDev *gdev = &req->vdev_blk->parent;
103406d2aa2SChangpeng Liu     VuDev *vu_dev = &gdev->parent;
104406d2aa2SChangpeng Liu 
105406d2aa2SChangpeng Liu     /* IO size with 1 extra status byte */
106406d2aa2SChangpeng Liu     vu_queue_push(vu_dev, req->vq, req->elem,
107406d2aa2SChangpeng Liu                   req->size + 1);
108406d2aa2SChangpeng Liu     vu_queue_notify(vu_dev, req->vq);
109406d2aa2SChangpeng Liu 
110a32086deSMarkus Armbruster     g_free(req->elem);
111406d2aa2SChangpeng Liu     g_free(req);
112406d2aa2SChangpeng Liu }
113406d2aa2SChangpeng Liu 
vub_open(const char * file_name,bool wce)114406d2aa2SChangpeng Liu static int vub_open(const char *file_name, bool wce)
115406d2aa2SChangpeng Liu {
116406d2aa2SChangpeng Liu     int fd;
117406d2aa2SChangpeng Liu     int flags = O_RDWR;
118406d2aa2SChangpeng Liu 
119406d2aa2SChangpeng Liu     if (!wce) {
120406d2aa2SChangpeng Liu         flags |= O_DIRECT;
121406d2aa2SChangpeng Liu     }
122406d2aa2SChangpeng Liu 
123406d2aa2SChangpeng Liu     fd = open(file_name, flags);
124406d2aa2SChangpeng Liu     if (fd < 0) {
125406d2aa2SChangpeng Liu         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
126406d2aa2SChangpeng Liu                 strerror(errno));
127406d2aa2SChangpeng Liu         return -1;
128406d2aa2SChangpeng Liu     }
129406d2aa2SChangpeng Liu 
130406d2aa2SChangpeng Liu     return fd;
131406d2aa2SChangpeng Liu }
132406d2aa2SChangpeng Liu 
133406d2aa2SChangpeng Liu static ssize_t
vub_readv(VubReq * req,struct iovec * iov,uint32_t iovcnt)134406d2aa2SChangpeng Liu vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
135406d2aa2SChangpeng Liu {
136406d2aa2SChangpeng Liu     VubDev *vdev_blk = req->vdev_blk;
137406d2aa2SChangpeng Liu     ssize_t rc;
138406d2aa2SChangpeng Liu 
139406d2aa2SChangpeng Liu     if (!iovcnt) {
140406d2aa2SChangpeng Liu         fprintf(stderr, "Invalid Read IOV count\n");
141406d2aa2SChangpeng Liu         return -1;
142406d2aa2SChangpeng Liu     }
143406d2aa2SChangpeng Liu 
144406d2aa2SChangpeng Liu     req->size = vub_iov_size(iov, iovcnt);
145406d2aa2SChangpeng Liu     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
146406d2aa2SChangpeng Liu     if (rc < 0) {
14726ed501bSAlex Bennée         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
148406d2aa2SChangpeng Liu                 vdev_blk->blk_name, req->sector_num, req->size,
149406d2aa2SChangpeng Liu                 strerror(errno));
150406d2aa2SChangpeng Liu         return -1;
151406d2aa2SChangpeng Liu     }
152406d2aa2SChangpeng Liu 
153406d2aa2SChangpeng Liu     return rc;
154406d2aa2SChangpeng Liu }
155406d2aa2SChangpeng Liu 
156406d2aa2SChangpeng Liu static ssize_t
vub_writev(VubReq * req,struct iovec * iov,uint32_t iovcnt)157406d2aa2SChangpeng Liu vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
158406d2aa2SChangpeng Liu {
159406d2aa2SChangpeng Liu     VubDev *vdev_blk = req->vdev_blk;
160406d2aa2SChangpeng Liu     ssize_t rc;
161406d2aa2SChangpeng Liu 
162406d2aa2SChangpeng Liu     if (!iovcnt) {
163406d2aa2SChangpeng Liu         fprintf(stderr, "Invalid Write IOV count\n");
164406d2aa2SChangpeng Liu         return -1;
165406d2aa2SChangpeng Liu     }
166406d2aa2SChangpeng Liu 
167406d2aa2SChangpeng Liu     req->size = vub_iov_size(iov, iovcnt);
168406d2aa2SChangpeng Liu     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
169406d2aa2SChangpeng Liu     if (rc < 0) {
17026ed501bSAlex Bennée         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
171406d2aa2SChangpeng Liu                 vdev_blk->blk_name, req->sector_num, req->size,
172406d2aa2SChangpeng Liu                 strerror(errno));
173406d2aa2SChangpeng Liu         return -1;
174406d2aa2SChangpeng Liu     }
175406d2aa2SChangpeng Liu 
176406d2aa2SChangpeng Liu     return rc;
177406d2aa2SChangpeng Liu }
178406d2aa2SChangpeng Liu 
179caa1ee43SChangpeng Liu static int
vub_discard_write_zeroes(VubReq * req,struct iovec * iov,uint32_t iovcnt,uint32_t type)180caa1ee43SChangpeng Liu vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
181caa1ee43SChangpeng Liu                          uint32_t type)
182caa1ee43SChangpeng Liu {
183caa1ee43SChangpeng Liu     struct virtio_blk_discard_write_zeroes *desc;
184caa1ee43SChangpeng Liu     ssize_t size;
185caa1ee43SChangpeng Liu     void *buf;
186caa1ee43SChangpeng Liu 
187caa1ee43SChangpeng Liu     size = vub_iov_size(iov, iovcnt);
188caa1ee43SChangpeng Liu     if (size != sizeof(*desc)) {
18926ed501bSAlex Bennée         fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc));
190caa1ee43SChangpeng Liu         return -1;
191caa1ee43SChangpeng Liu     }
192caa1ee43SChangpeng Liu     buf = g_new0(char, size);
193caa1ee43SChangpeng Liu     vub_iov_to_buf(iov, iovcnt, buf);
194caa1ee43SChangpeng Liu 
195caa1ee43SChangpeng Liu     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
196caa1ee43SChangpeng Liu     VubDev *vdev_blk = req->vdev_blk;
1973d558330SMarkus Armbruster     desc = buf;
1985ab04420SStefano Garzarella     uint64_t range[2] = { le64_to_cpu(desc->sector) << 9,
199*99d7c1b9SStefano Garzarella                           (uint64_t)le32_to_cpu(desc->num_sectors) << 9 };
200caa1ee43SChangpeng Liu     if (type == VIRTIO_BLK_T_DISCARD) {
201caa1ee43SChangpeng Liu         if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
202caa1ee43SChangpeng Liu             g_free(buf);
203caa1ee43SChangpeng Liu             return 0;
204caa1ee43SChangpeng Liu         }
205caa1ee43SChangpeng Liu     } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
206caa1ee43SChangpeng Liu         if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
207caa1ee43SChangpeng Liu             g_free(buf);
208caa1ee43SChangpeng Liu             return 0;
209caa1ee43SChangpeng Liu         }
210caa1ee43SChangpeng Liu     }
211caa1ee43SChangpeng Liu     #endif
212caa1ee43SChangpeng Liu 
213caa1ee43SChangpeng Liu     g_free(buf);
214caa1ee43SChangpeng Liu     return -1;
215caa1ee43SChangpeng Liu }
216caa1ee43SChangpeng Liu 
217406d2aa2SChangpeng Liu static void
vub_flush(VubReq * req)218406d2aa2SChangpeng Liu vub_flush(VubReq *req)
219406d2aa2SChangpeng Liu {
220406d2aa2SChangpeng Liu     VubDev *vdev_blk = req->vdev_blk;
221406d2aa2SChangpeng Liu 
222406d2aa2SChangpeng Liu     fdatasync(vdev_blk->blk_fd);
223406d2aa2SChangpeng Liu }
224406d2aa2SChangpeng Liu 
vub_virtio_process_req(VubDev * vdev_blk,VuVirtq * vq)225406d2aa2SChangpeng Liu static int vub_virtio_process_req(VubDev *vdev_blk,
226406d2aa2SChangpeng Liu                                      VuVirtq *vq)
227406d2aa2SChangpeng Liu {
228406d2aa2SChangpeng Liu     VugDev *gdev = &vdev_blk->parent;
229406d2aa2SChangpeng Liu     VuDev *vu_dev = &gdev->parent;
230406d2aa2SChangpeng Liu     VuVirtqElement *elem;
231406d2aa2SChangpeng Liu     uint32_t type;
232406d2aa2SChangpeng Liu     unsigned in_num;
233406d2aa2SChangpeng Liu     unsigned out_num;
234406d2aa2SChangpeng Liu     VubReq *req;
235406d2aa2SChangpeng Liu 
236406d2aa2SChangpeng Liu     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
237406d2aa2SChangpeng Liu     if (!elem) {
238406d2aa2SChangpeng Liu         return -1;
239406d2aa2SChangpeng Liu     }
240406d2aa2SChangpeng Liu 
241406d2aa2SChangpeng Liu     /* refer to hw/block/virtio_blk.c */
242406d2aa2SChangpeng Liu     if (elem->out_num < 1 || elem->in_num < 1) {
243406d2aa2SChangpeng Liu         fprintf(stderr, "virtio-blk request missing headers\n");
244a32086deSMarkus Armbruster         g_free(elem);
245406d2aa2SChangpeng Liu         return -1;
246406d2aa2SChangpeng Liu     }
247406d2aa2SChangpeng Liu 
248406d2aa2SChangpeng Liu     req = g_new0(VubReq, 1);
249406d2aa2SChangpeng Liu     req->vdev_blk = vdev_blk;
250406d2aa2SChangpeng Liu     req->vq = vq;
251406d2aa2SChangpeng Liu     req->elem = elem;
252406d2aa2SChangpeng Liu 
253406d2aa2SChangpeng Liu     in_num = elem->in_num;
254406d2aa2SChangpeng Liu     out_num = elem->out_num;
255406d2aa2SChangpeng Liu 
256406d2aa2SChangpeng Liu     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
257406d2aa2SChangpeng Liu     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
258406d2aa2SChangpeng Liu         fprintf(stderr, "Invalid outhdr size\n");
259406d2aa2SChangpeng Liu         goto err;
260406d2aa2SChangpeng Liu     }
261406d2aa2SChangpeng Liu     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
262406d2aa2SChangpeng Liu     out_num--;
263406d2aa2SChangpeng Liu 
264406d2aa2SChangpeng Liu     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
265406d2aa2SChangpeng Liu         fprintf(stderr, "Invalid inhdr size\n");
266406d2aa2SChangpeng Liu         goto err;
267406d2aa2SChangpeng Liu     }
268406d2aa2SChangpeng Liu     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
269406d2aa2SChangpeng Liu     in_num--;
270406d2aa2SChangpeng Liu 
2715ab04420SStefano Garzarella     type = le32_to_cpu(req->out->type);
272caa1ee43SChangpeng Liu     switch (type & ~VIRTIO_BLK_T_BARRIER) {
273caa1ee43SChangpeng Liu     case VIRTIO_BLK_T_IN:
274caa1ee43SChangpeng Liu     case VIRTIO_BLK_T_OUT: {
275406d2aa2SChangpeng Liu         ssize_t ret = 0;
276406d2aa2SChangpeng Liu         bool is_write = type & VIRTIO_BLK_T_OUT;
2775ab04420SStefano Garzarella         req->sector_num = le64_to_cpu(req->out->sector);
278406d2aa2SChangpeng Liu         if (is_write) {
279406d2aa2SChangpeng Liu             ret  = vub_writev(req, &elem->out_sg[1], out_num);
280406d2aa2SChangpeng Liu         } else {
281406d2aa2SChangpeng Liu             ret = vub_readv(req, &elem->in_sg[0], in_num);
282406d2aa2SChangpeng Liu         }
283406d2aa2SChangpeng Liu         if (ret >= 0) {
284406d2aa2SChangpeng Liu             req->in->status = VIRTIO_BLK_S_OK;
285406d2aa2SChangpeng Liu         } else {
286406d2aa2SChangpeng Liu             req->in->status = VIRTIO_BLK_S_IOERR;
287406d2aa2SChangpeng Liu         }
288406d2aa2SChangpeng Liu         vub_req_complete(req);
289406d2aa2SChangpeng Liu         break;
290406d2aa2SChangpeng Liu     }
291caa1ee43SChangpeng Liu     case VIRTIO_BLK_T_FLUSH:
292406d2aa2SChangpeng Liu         vub_flush(req);
293406d2aa2SChangpeng Liu         req->in->status = VIRTIO_BLK_S_OK;
294406d2aa2SChangpeng Liu         vub_req_complete(req);
295406d2aa2SChangpeng Liu         break;
296406d2aa2SChangpeng Liu     case VIRTIO_BLK_T_GET_ID: {
297406d2aa2SChangpeng Liu         size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
298406d2aa2SChangpeng Liu                           VIRTIO_BLK_ID_BYTES);
299406d2aa2SChangpeng Liu         snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
300406d2aa2SChangpeng Liu         req->in->status = VIRTIO_BLK_S_OK;
301406d2aa2SChangpeng Liu         req->size = elem->in_sg[0].iov_len;
302406d2aa2SChangpeng Liu         vub_req_complete(req);
303406d2aa2SChangpeng Liu         break;
304406d2aa2SChangpeng Liu     }
305caa1ee43SChangpeng Liu     case VIRTIO_BLK_T_DISCARD:
306caa1ee43SChangpeng Liu     case VIRTIO_BLK_T_WRITE_ZEROES: {
307caa1ee43SChangpeng Liu         int rc;
308caa1ee43SChangpeng Liu         rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
309caa1ee43SChangpeng Liu         if (rc == 0) {
310caa1ee43SChangpeng Liu             req->in->status = VIRTIO_BLK_S_OK;
311caa1ee43SChangpeng Liu         } else {
312caa1ee43SChangpeng Liu             req->in->status = VIRTIO_BLK_S_IOERR;
313caa1ee43SChangpeng Liu         }
314406d2aa2SChangpeng Liu         vub_req_complete(req);
315406d2aa2SChangpeng Liu         break;
316406d2aa2SChangpeng Liu     }
317caa1ee43SChangpeng Liu     default:
318caa1ee43SChangpeng Liu         req->in->status = VIRTIO_BLK_S_UNSUPP;
319caa1ee43SChangpeng Liu         vub_req_complete(req);
320caa1ee43SChangpeng Liu         break;
321406d2aa2SChangpeng Liu     }
322406d2aa2SChangpeng Liu 
323406d2aa2SChangpeng Liu     return 0;
324406d2aa2SChangpeng Liu 
325406d2aa2SChangpeng Liu err:
326a32086deSMarkus Armbruster     g_free(elem);
327406d2aa2SChangpeng Liu     g_free(req);
328406d2aa2SChangpeng Liu     return -1;
329406d2aa2SChangpeng Liu }
330406d2aa2SChangpeng Liu 
vub_process_vq(VuDev * vu_dev,int idx)331406d2aa2SChangpeng Liu static void vub_process_vq(VuDev *vu_dev, int idx)
332406d2aa2SChangpeng Liu {
333406d2aa2SChangpeng Liu     VugDev *gdev;
334406d2aa2SChangpeng Liu     VubDev *vdev_blk;
335406d2aa2SChangpeng Liu     VuVirtq *vq;
336406d2aa2SChangpeng Liu     int ret;
337406d2aa2SChangpeng Liu 
338406d2aa2SChangpeng Liu     gdev = container_of(vu_dev, VugDev, parent);
339406d2aa2SChangpeng Liu     vdev_blk = container_of(gdev, VubDev, parent);
340406d2aa2SChangpeng Liu     assert(vdev_blk);
341406d2aa2SChangpeng Liu 
342406d2aa2SChangpeng Liu     vq = vu_get_queue(vu_dev, idx);
343406d2aa2SChangpeng Liu     assert(vq);
344406d2aa2SChangpeng Liu 
345406d2aa2SChangpeng Liu     while (1) {
346406d2aa2SChangpeng Liu         ret = vub_virtio_process_req(vdev_blk, vq);
347406d2aa2SChangpeng Liu         if (ret) {
348406d2aa2SChangpeng Liu             break;
349406d2aa2SChangpeng Liu         }
350406d2aa2SChangpeng Liu     }
351406d2aa2SChangpeng Liu }
352406d2aa2SChangpeng Liu 
vub_queue_set_started(VuDev * vu_dev,int idx,bool started)353406d2aa2SChangpeng Liu static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
354406d2aa2SChangpeng Liu {
355406d2aa2SChangpeng Liu     VuVirtq *vq;
356406d2aa2SChangpeng Liu 
357406d2aa2SChangpeng Liu     assert(vu_dev);
358406d2aa2SChangpeng Liu 
359406d2aa2SChangpeng Liu     vq = vu_get_queue(vu_dev, idx);
360406d2aa2SChangpeng Liu     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
361406d2aa2SChangpeng Liu }
362406d2aa2SChangpeng Liu 
363406d2aa2SChangpeng Liu static uint64_t
vub_get_features(VuDev * dev)364406d2aa2SChangpeng Liu vub_get_features(VuDev *dev)
365406d2aa2SChangpeng Liu {
36625b1d45aSChangpeng Liu     uint64_t features;
36725b1d45aSChangpeng Liu     VugDev *gdev;
36825b1d45aSChangpeng Liu     VubDev *vdev_blk;
36925b1d45aSChangpeng Liu 
37025b1d45aSChangpeng Liu     gdev = container_of(dev, VugDev, parent);
37125b1d45aSChangpeng Liu     vdev_blk = container_of(gdev, VubDev, parent);
37225b1d45aSChangpeng Liu 
37325b1d45aSChangpeng Liu     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
374406d2aa2SChangpeng Liu                1ull << VIRTIO_BLK_F_SEG_MAX |
375406d2aa2SChangpeng Liu                1ull << VIRTIO_BLK_F_TOPOLOGY |
376406d2aa2SChangpeng Liu                1ull << VIRTIO_BLK_F_BLK_SIZE |
377406d2aa2SChangpeng Liu                1ull << VIRTIO_BLK_F_FLUSH |
378caa1ee43SChangpeng Liu                #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
379caa1ee43SChangpeng Liu                1ull << VIRTIO_BLK_F_DISCARD |
380caa1ee43SChangpeng Liu                1ull << VIRTIO_BLK_F_WRITE_ZEROES |
381caa1ee43SChangpeng Liu                #endif
382a9a5c473SStefan Hajnoczi                1ull << VIRTIO_BLK_F_CONFIG_WCE;
38325b1d45aSChangpeng Liu 
38425b1d45aSChangpeng Liu     if (vdev_blk->enable_ro) {
38525b1d45aSChangpeng Liu         features |= 1ull << VIRTIO_BLK_F_RO;
38625b1d45aSChangpeng Liu     }
38725b1d45aSChangpeng Liu 
38825b1d45aSChangpeng Liu     return features;
389406d2aa2SChangpeng Liu }
390406d2aa2SChangpeng Liu 
3917d405b2fSChangpeng Liu static uint64_t
vub_get_protocol_features(VuDev * dev)3927d405b2fSChangpeng Liu vub_get_protocol_features(VuDev *dev)
3937d405b2fSChangpeng Liu {
3946b8f9c6eSXie Yongji     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
3956b8f9c6eSXie Yongji            1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
3967d405b2fSChangpeng Liu }
3977d405b2fSChangpeng Liu 
398406d2aa2SChangpeng Liu static int
vub_get_config(VuDev * vu_dev,uint8_t * config,uint32_t len)399406d2aa2SChangpeng Liu vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
400406d2aa2SChangpeng Liu {
401406d2aa2SChangpeng Liu     VugDev *gdev;
402406d2aa2SChangpeng Liu     VubDev *vdev_blk;
403406d2aa2SChangpeng Liu 
404acb1f3c2SStefan Hajnoczi     if (len > sizeof(struct virtio_blk_config)) {
405acb1f3c2SStefan Hajnoczi         return -1;
406acb1f3c2SStefan Hajnoczi     }
4079f6df01dSStefan Hajnoczi 
408406d2aa2SChangpeng Liu     gdev = container_of(vu_dev, VugDev, parent);
409406d2aa2SChangpeng Liu     vdev_blk = container_of(gdev, VubDev, parent);
410406d2aa2SChangpeng Liu     memcpy(config, &vdev_blk->blkcfg, len);
411406d2aa2SChangpeng Liu 
412406d2aa2SChangpeng Liu     return 0;
413406d2aa2SChangpeng Liu }
414406d2aa2SChangpeng Liu 
415406d2aa2SChangpeng Liu static int
vub_set_config(VuDev * vu_dev,const uint8_t * data,uint32_t offset,uint32_t size,uint32_t flags)416406d2aa2SChangpeng Liu vub_set_config(VuDev *vu_dev, const uint8_t *data,
417406d2aa2SChangpeng Liu                uint32_t offset, uint32_t size, uint32_t flags)
418406d2aa2SChangpeng Liu {
419406d2aa2SChangpeng Liu     VugDev *gdev;
420406d2aa2SChangpeng Liu     VubDev *vdev_blk;
421406d2aa2SChangpeng Liu     uint8_t wce;
422406d2aa2SChangpeng Liu     int fd;
423406d2aa2SChangpeng Liu 
424406d2aa2SChangpeng Liu     /* don't support live migration */
425f8ed3648SManos Pitsidianakis     if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) {
426406d2aa2SChangpeng Liu         return -1;
427406d2aa2SChangpeng Liu     }
428406d2aa2SChangpeng Liu 
429406d2aa2SChangpeng Liu     gdev = container_of(vu_dev, VugDev, parent);
430406d2aa2SChangpeng Liu     vdev_blk = container_of(gdev, VubDev, parent);
431406d2aa2SChangpeng Liu 
432406d2aa2SChangpeng Liu     if (offset != offsetof(struct virtio_blk_config, wce) ||
433406d2aa2SChangpeng Liu         size != 1) {
434406d2aa2SChangpeng Liu         return -1;
435406d2aa2SChangpeng Liu     }
436406d2aa2SChangpeng Liu 
437406d2aa2SChangpeng Liu     wce = *data;
438406d2aa2SChangpeng Liu     if (wce == vdev_blk->blkcfg.wce) {
439406d2aa2SChangpeng Liu         /* Do nothing as same with old configuration */
440406d2aa2SChangpeng Liu         return 0;
441406d2aa2SChangpeng Liu     }
442406d2aa2SChangpeng Liu 
443406d2aa2SChangpeng Liu     vdev_blk->blkcfg.wce = wce;
444406d2aa2SChangpeng Liu     fprintf(stdout, "Write Cache Policy Changed\n");
445406d2aa2SChangpeng Liu     if (vdev_blk->blk_fd >= 0) {
446406d2aa2SChangpeng Liu         close(vdev_blk->blk_fd);
447406d2aa2SChangpeng Liu         vdev_blk->blk_fd = -1;
448406d2aa2SChangpeng Liu     }
449406d2aa2SChangpeng Liu 
450406d2aa2SChangpeng Liu     fd = vub_open(vdev_blk->blk_name, wce);
451406d2aa2SChangpeng Liu     if (fd < 0) {
452406d2aa2SChangpeng Liu         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
453406d2aa2SChangpeng Liu         vdev_blk->blk_fd = -1;
454406d2aa2SChangpeng Liu         return -1;
455406d2aa2SChangpeng Liu     }
456406d2aa2SChangpeng Liu     vdev_blk->blk_fd = fd;
457406d2aa2SChangpeng Liu 
458406d2aa2SChangpeng Liu     return 0;
459406d2aa2SChangpeng Liu }
460406d2aa2SChangpeng Liu 
461406d2aa2SChangpeng Liu static const VuDevIface vub_iface = {
462406d2aa2SChangpeng Liu     .get_features = vub_get_features,
463406d2aa2SChangpeng Liu     .queue_set_started = vub_queue_set_started,
4647d405b2fSChangpeng Liu     .get_protocol_features = vub_get_protocol_features,
465406d2aa2SChangpeng Liu     .get_config = vub_get_config,
466406d2aa2SChangpeng Liu     .set_config = vub_set_config,
467406d2aa2SChangpeng Liu };
468406d2aa2SChangpeng Liu 
unix_sock_new(char * unix_fn)469406d2aa2SChangpeng Liu static int unix_sock_new(char *unix_fn)
470406d2aa2SChangpeng Liu {
471406d2aa2SChangpeng Liu     int sock;
472406d2aa2SChangpeng Liu     struct sockaddr_un un;
473406d2aa2SChangpeng Liu 
474406d2aa2SChangpeng Liu     assert(unix_fn);
475406d2aa2SChangpeng Liu 
476406d2aa2SChangpeng Liu     sock = socket(AF_UNIX, SOCK_STREAM, 0);
47791010f04SAlexChen     if (sock < 0) {
478406d2aa2SChangpeng Liu         perror("socket");
479406d2aa2SChangpeng Liu         return -1;
480406d2aa2SChangpeng Liu     }
481406d2aa2SChangpeng Liu 
482406d2aa2SChangpeng Liu     un.sun_family = AF_UNIX;
483406d2aa2SChangpeng Liu     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
484406d2aa2SChangpeng Liu 
485406d2aa2SChangpeng Liu     (void)unlink(unix_fn);
48603582094SStefano Garzarella     if (bind(sock, (struct sockaddr *)&un, sizeof(un)) < 0) {
487406d2aa2SChangpeng Liu         perror("bind");
488406d2aa2SChangpeng Liu         goto fail;
489406d2aa2SChangpeng Liu     }
490406d2aa2SChangpeng Liu 
491406d2aa2SChangpeng Liu     if (listen(sock, 1) < 0) {
492406d2aa2SChangpeng Liu         perror("listen");
493406d2aa2SChangpeng Liu         goto fail;
494406d2aa2SChangpeng Liu     }
495406d2aa2SChangpeng Liu 
496406d2aa2SChangpeng Liu     return sock;
497406d2aa2SChangpeng Liu 
498406d2aa2SChangpeng Liu fail:
499406d2aa2SChangpeng Liu     (void)close(sock);
500406d2aa2SChangpeng Liu 
501406d2aa2SChangpeng Liu     return -1;
502406d2aa2SChangpeng Liu }
503406d2aa2SChangpeng Liu 
vub_free(struct VubDev * vdev_blk)504406d2aa2SChangpeng Liu static void vub_free(struct VubDev *vdev_blk)
505406d2aa2SChangpeng Liu {
506406d2aa2SChangpeng Liu     if (!vdev_blk) {
507406d2aa2SChangpeng Liu         return;
508406d2aa2SChangpeng Liu     }
509406d2aa2SChangpeng Liu 
510406d2aa2SChangpeng Liu     g_main_loop_unref(vdev_blk->loop);
511406d2aa2SChangpeng Liu     if (vdev_blk->blk_fd >= 0) {
512406d2aa2SChangpeng Liu         close(vdev_blk->blk_fd);
513406d2aa2SChangpeng Liu     }
514406d2aa2SChangpeng Liu     g_free(vdev_blk);
515406d2aa2SChangpeng Liu }
516406d2aa2SChangpeng Liu 
517406d2aa2SChangpeng Liu static uint32_t
vub_get_blocksize(int fd)518406d2aa2SChangpeng Liu vub_get_blocksize(int fd)
519406d2aa2SChangpeng Liu {
520406d2aa2SChangpeng Liu     uint32_t blocksize = 512;
521406d2aa2SChangpeng Liu 
522406d2aa2SChangpeng Liu #if defined(__linux__) && defined(BLKSSZGET)
523406d2aa2SChangpeng Liu     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
524a56de056SPeter Xu         return blocksize;
525406d2aa2SChangpeng Liu     }
526406d2aa2SChangpeng Liu #endif
527406d2aa2SChangpeng Liu 
528406d2aa2SChangpeng Liu     return blocksize;
529406d2aa2SChangpeng Liu }
530406d2aa2SChangpeng Liu 
531406d2aa2SChangpeng Liu static void
vub_initialize_config(int fd,struct virtio_blk_config * config)532406d2aa2SChangpeng Liu vub_initialize_config(int fd, struct virtio_blk_config *config)
533406d2aa2SChangpeng Liu {
53487d67ffeSKhem Raj     off_t capacity;
535406d2aa2SChangpeng Liu 
53687d67ffeSKhem Raj     capacity = lseek(fd, 0, SEEK_END);
537406d2aa2SChangpeng Liu     config->capacity = capacity >> 9;
538406d2aa2SChangpeng Liu     config->blk_size = vub_get_blocksize(fd);
539406d2aa2SChangpeng Liu     config->size_max = 65536;
540406d2aa2SChangpeng Liu     config->seg_max = 128 - 2;
541406d2aa2SChangpeng Liu     config->min_io_size = 1;
542406d2aa2SChangpeng Liu     config->opt_io_size = 1;
543406d2aa2SChangpeng Liu     config->num_queues = 1;
544caa1ee43SChangpeng Liu     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
545caa1ee43SChangpeng Liu     config->max_discard_sectors = 32768;
546caa1ee43SChangpeng Liu     config->max_discard_seg = 1;
547caa1ee43SChangpeng Liu     config->discard_sector_alignment = config->blk_size >> 9;
548caa1ee43SChangpeng Liu     config->max_write_zeroes_sectors = 32768;
549caa1ee43SChangpeng Liu     config->max_write_zeroes_seg = 1;
550caa1ee43SChangpeng Liu     #endif
551406d2aa2SChangpeng Liu }
552406d2aa2SChangpeng Liu 
553406d2aa2SChangpeng Liu static VubDev *
vub_new(char * blk_file)554406d2aa2SChangpeng Liu vub_new(char *blk_file)
555406d2aa2SChangpeng Liu {
556406d2aa2SChangpeng Liu     VubDev *vdev_blk;
557406d2aa2SChangpeng Liu 
558406d2aa2SChangpeng Liu     vdev_blk = g_new0(VubDev, 1);
559406d2aa2SChangpeng Liu     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
560406d2aa2SChangpeng Liu     vdev_blk->blk_fd = vub_open(blk_file, 0);
561406d2aa2SChangpeng Liu     if (vdev_blk->blk_fd  < 0) {
562406d2aa2SChangpeng Liu         fprintf(stderr, "Error to open block device %s\n", blk_file);
563406d2aa2SChangpeng Liu         vub_free(vdev_blk);
564406d2aa2SChangpeng Liu         return NULL;
565406d2aa2SChangpeng Liu     }
56625b1d45aSChangpeng Liu     vdev_blk->enable_ro = false;
567406d2aa2SChangpeng Liu     vdev_blk->blkcfg.wce = 0;
568406d2aa2SChangpeng Liu     vdev_blk->blk_name = blk_file;
569406d2aa2SChangpeng Liu 
570406d2aa2SChangpeng Liu     /* fill virtio_blk_config with block parameters */
571406d2aa2SChangpeng Liu     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
572406d2aa2SChangpeng Liu 
573406d2aa2SChangpeng Liu     return vdev_blk;
574406d2aa2SChangpeng Liu }
575406d2aa2SChangpeng Liu 
5766620801fSMicky Yun Chan static int opt_fdnum = -1;
5776620801fSMicky Yun Chan static char *opt_socket_path;
5786620801fSMicky Yun Chan static char *opt_blk_file;
5796620801fSMicky Yun Chan static gboolean opt_print_caps;
5806620801fSMicky Yun Chan static gboolean opt_read_only;
5816620801fSMicky Yun Chan 
5826620801fSMicky Yun Chan static GOptionEntry entries[] = {
5836620801fSMicky Yun Chan     { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
5846620801fSMicky Yun Chan       "Print capabilities", NULL },
5856620801fSMicky Yun Chan     { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
5866620801fSMicky Yun Chan       "Use inherited fd socket", "FDNUM" },
5876620801fSMicky Yun Chan     { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
5886620801fSMicky Yun Chan       "Use UNIX socket path", "PATH" },
5896620801fSMicky Yun Chan     {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
5906620801fSMicky Yun Chan      "block device or file path", "PATH"},
5916620801fSMicky Yun Chan     { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
592096b778fSStefan Hajnoczi       "Enable read-only", NULL },
593096b778fSStefan Hajnoczi     { NULL, },
5946620801fSMicky Yun Chan };
5956620801fSMicky Yun Chan 
main(int argc,char ** argv)596406d2aa2SChangpeng Liu int main(int argc, char **argv)
597406d2aa2SChangpeng Liu {
598406d2aa2SChangpeng Liu     int lsock = -1, csock = -1;
599406d2aa2SChangpeng Liu     VubDev *vdev_blk = NULL;
6006620801fSMicky Yun Chan     GError *error = NULL;
6016620801fSMicky Yun Chan     GOptionContext *context;
602406d2aa2SChangpeng Liu 
6036620801fSMicky Yun Chan     context = g_option_context_new(NULL);
6046620801fSMicky Yun Chan     g_option_context_add_main_entries(context, entries, NULL);
6056620801fSMicky Yun Chan     if (!g_option_context_parse(context, &argc, &argv, &error)) {
6066620801fSMicky Yun Chan         g_printerr("Option parsing failed: %s\n", error->message);
6076620801fSMicky Yun Chan         exit(EXIT_FAILURE);
608406d2aa2SChangpeng Liu     }
6096620801fSMicky Yun Chan     if (opt_print_caps) {
6106620801fSMicky Yun Chan         g_print("{\n");
6116620801fSMicky Yun Chan         g_print("  \"type\": \"block\",\n");
6126620801fSMicky Yun Chan         g_print("  \"features\": [\n");
6136620801fSMicky Yun Chan         g_print("    \"read-only\",\n");
6146620801fSMicky Yun Chan         g_print("    \"blk-file\"\n");
6156620801fSMicky Yun Chan         g_print("  ]\n");
6166620801fSMicky Yun Chan         g_print("}\n");
6176620801fSMicky Yun Chan         exit(EXIT_SUCCESS);
618406d2aa2SChangpeng Liu     }
619406d2aa2SChangpeng Liu 
6206620801fSMicky Yun Chan     if (!opt_blk_file) {
6216620801fSMicky Yun Chan         g_print("%s\n", g_option_context_get_help(context, true, NULL));
6226620801fSMicky Yun Chan         exit(EXIT_FAILURE);
623406d2aa2SChangpeng Liu     }
624406d2aa2SChangpeng Liu 
6256620801fSMicky Yun Chan     if (opt_socket_path) {
6266620801fSMicky Yun Chan         lsock = unix_sock_new(opt_socket_path);
627406d2aa2SChangpeng Liu         if (lsock < 0) {
6286620801fSMicky Yun Chan             exit(EXIT_FAILURE);
6296620801fSMicky Yun Chan         }
6306620801fSMicky Yun Chan     } else if (opt_fdnum < 0) {
6316620801fSMicky Yun Chan         g_print("%s\n", g_option_context_get_help(context, true, NULL));
6326620801fSMicky Yun Chan         exit(EXIT_FAILURE);
6336620801fSMicky Yun Chan     } else {
6346620801fSMicky Yun Chan         lsock = opt_fdnum;
635406d2aa2SChangpeng Liu     }
636406d2aa2SChangpeng Liu 
6376620801fSMicky Yun Chan     csock = accept(lsock, NULL, NULL);
638406d2aa2SChangpeng Liu     if (csock < 0) {
6396620801fSMicky Yun Chan         g_printerr("Accept error %s\n", strerror(errno));
6406620801fSMicky Yun Chan         exit(EXIT_FAILURE);
641406d2aa2SChangpeng Liu     }
642406d2aa2SChangpeng Liu 
6436620801fSMicky Yun Chan     vdev_blk = vub_new(opt_blk_file);
644406d2aa2SChangpeng Liu     if (!vdev_blk) {
6456620801fSMicky Yun Chan         exit(EXIT_FAILURE);
646406d2aa2SChangpeng Liu     }
6476620801fSMicky Yun Chan     if (opt_read_only) {
64825b1d45aSChangpeng Liu         vdev_blk->enable_ro = true;
64925b1d45aSChangpeng Liu     }
650406d2aa2SChangpeng Liu 
6516f5fd837SStefan Hajnoczi     if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
6526f5fd837SStefan Hajnoczi                   vub_panic_cb, &vub_iface)) {
6536620801fSMicky Yun Chan         g_printerr("Failed to initialize libvhost-user-glib\n");
6546620801fSMicky Yun Chan         exit(EXIT_FAILURE);
6556f5fd837SStefan Hajnoczi     }
656406d2aa2SChangpeng Liu 
657406d2aa2SChangpeng Liu     g_main_loop_run(vdev_blk->loop);
6586620801fSMicky Yun Chan     g_main_loop_unref(vdev_blk->loop);
6596620801fSMicky Yun Chan     g_option_context_free(context);
660406d2aa2SChangpeng Liu     vug_deinit(&vdev_blk->parent);
661406d2aa2SChangpeng Liu     vub_free(vdev_blk);
662406d2aa2SChangpeng Liu     if (csock >= 0) {
663406d2aa2SChangpeng Liu         close(csock);
664406d2aa2SChangpeng Liu     }
665406d2aa2SChangpeng Liu     if (lsock >= 0) {
666406d2aa2SChangpeng Liu         close(lsock);
667406d2aa2SChangpeng Liu     }
6686620801fSMicky Yun Chan     g_free(opt_socket_path);
6696620801fSMicky Yun Chan     g_free(opt_blk_file);
670406d2aa2SChangpeng Liu 
671406d2aa2SChangpeng Liu     return 0;
672406d2aa2SChangpeng Liu }
673