xref: /openbmc/qemu/block/export/vhost-user-blk-server.c (revision 19a989096e5d439c78a887bb51d4d9a5310557c9)
1  /*
2   * Sharing QEMU block devices via vhost-user protocol
3   *
4   * Parts of the code based on nbd/server.c.
5   *
6   * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
7   * Copyright (c) 2020 Red Hat, Inc.
8   *
9   * This work is licensed under the terms of the GNU GPL, version 2 or
10   * later.  See the COPYING file in the top-level directory.
11   */
12  #include "qemu/osdep.h"
13  #include "qemu/error-report.h"
14  #include "block/block.h"
15  #include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */
16  #include "standard-headers/linux/virtio_blk.h"
17  #include "qemu/vhost-user-server.h"
18  #include "vhost-user-blk-server.h"
19  #include "qapi/error.h"
20  #include "qom/object_interfaces.h"
21  #include "util/block-helpers.h"
22  #include "virtio-blk-handler.h"
23  
24  enum {
25      VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
26  };
27  
28  typedef struct VuBlkReq {
29      VuVirtqElement elem;
30      VuServer *server;
31      struct VuVirtq *vq;
32  } VuBlkReq;
33  
34  /* vhost user block device */
35  typedef struct {
36      BlockExport export;
37      VuServer vu_server;
38      VirtioBlkHandler handler;
39      QIOChannelSocket *sioc;
40      struct virtio_blk_config blkcfg;
41  } VuBlkExport;
42  
vu_blk_req_complete(VuBlkReq * req,size_t in_len)43  static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
44  {
45      VuDev *vu_dev = &req->server->vu_dev;
46  
47      vu_queue_push(vu_dev, req->vq, &req->elem, in_len);
48      vu_queue_notify(vu_dev, req->vq);
49  
50      free(req);
51  }
52  
53  /*
54   * Called with server in_flight counter increased, must decrease before
55   * returning.
56   */
vu_blk_virtio_process_req(void * opaque)57  static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
58  {
59      VuBlkReq *req = opaque;
60      VuServer *server = req->server;
61      VuVirtqElement *elem = &req->elem;
62      VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
63      VirtioBlkHandler *handler = &vexp->handler;
64      struct iovec *in_iov = elem->in_sg;
65      struct iovec *out_iov = elem->out_sg;
66      unsigned in_num = elem->in_num;
67      unsigned out_num = elem->out_num;
68      int in_len;
69  
70      in_len = virtio_blk_process_req(handler, in_iov, out_iov,
71                                      in_num, out_num);
72      if (in_len < 0) {
73          free(req);
74          vhost_user_server_dec_in_flight(server);
75          return;
76      }
77  
78      vu_blk_req_complete(req, in_len);
79      vhost_user_server_dec_in_flight(server);
80  }
81  
vu_blk_process_vq(VuDev * vu_dev,int idx)82  static void vu_blk_process_vq(VuDev *vu_dev, int idx)
83  {
84      VuServer *server = container_of(vu_dev, VuServer, vu_dev);
85      VuVirtq *vq = vu_get_queue(vu_dev, idx);
86  
87      while (1) {
88          VuBlkReq *req;
89  
90          req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq));
91          if (!req) {
92              break;
93          }
94  
95          req->server = server;
96          req->vq = vq;
97  
98          Coroutine *co =
99              qemu_coroutine_create(vu_blk_virtio_process_req, req);
100  
101          vhost_user_server_inc_in_flight(server);
102          qemu_coroutine_enter(co);
103      }
104  }
105  
vu_blk_queue_set_started(VuDev * vu_dev,int idx,bool started)106  static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started)
107  {
108      VuVirtq *vq;
109  
110      assert(vu_dev);
111  
112      vq = vu_get_queue(vu_dev, idx);
113      vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL);
114  }
115  
vu_blk_get_features(VuDev * dev)116  static uint64_t vu_blk_get_features(VuDev *dev)
117  {
118      uint64_t features;
119      VuServer *server = container_of(dev, VuServer, vu_dev);
120      VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
121      features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
122                 1ull << VIRTIO_BLK_F_SEG_MAX |
123                 1ull << VIRTIO_BLK_F_TOPOLOGY |
124                 1ull << VIRTIO_BLK_F_BLK_SIZE |
125                 1ull << VIRTIO_BLK_F_FLUSH |
126                 1ull << VIRTIO_BLK_F_DISCARD |
127                 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
128                 1ull << VIRTIO_BLK_F_CONFIG_WCE |
129                 1ull << VIRTIO_BLK_F_MQ |
130                 1ull << VIRTIO_F_VERSION_1 |
131                 1ull << VIRTIO_RING_F_INDIRECT_DESC |
132                 1ull << VIRTIO_RING_F_EVENT_IDX |
133                 1ull << VHOST_USER_F_PROTOCOL_FEATURES;
134  
135      if (!vexp->handler.writable) {
136          features |= 1ull << VIRTIO_BLK_F_RO;
137      }
138  
139      return features;
140  }
141  
vu_blk_get_protocol_features(VuDev * dev)142  static uint64_t vu_blk_get_protocol_features(VuDev *dev)
143  {
144      return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
145  }
146  
147  static int
vu_blk_get_config(VuDev * vu_dev,uint8_t * config,uint32_t len)148  vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
149  {
150      VuServer *server = container_of(vu_dev, VuServer, vu_dev);
151      VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
152  
153      if (len > sizeof(struct virtio_blk_config)) {
154          return -1;
155      }
156  
157      memcpy(config, &vexp->blkcfg, len);
158      return 0;
159  }
160  
161  static int
vu_blk_set_config(VuDev * vu_dev,const uint8_t * data,uint32_t offset,uint32_t size,uint32_t flags)162  vu_blk_set_config(VuDev *vu_dev, const uint8_t *data,
163                      uint32_t offset, uint32_t size, uint32_t flags)
164  {
165      VuServer *server = container_of(vu_dev, VuServer, vu_dev);
166      VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
167      uint8_t wce;
168  
169      /* don't support live migration */
170      if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) {
171          return -EINVAL;
172      }
173  
174      if (offset != offsetof(struct virtio_blk_config, wce) ||
175          size != 1) {
176          return -EINVAL;
177      }
178  
179      wce = *data;
180      vexp->blkcfg.wce = wce;
181      blk_set_enable_write_cache(vexp->export.blk, wce);
182      return 0;
183  }
184  
185  /*
186   * When the client disconnects, it sends a VHOST_USER_NONE request
187   * and vu_process_message will simple call exit which cause the VM
188   * to exit abruptly.
189   * To avoid this issue,  process VHOST_USER_NONE request ahead
190   * of vu_process_message.
191   *
192   */
vu_blk_process_msg(VuDev * dev,VhostUserMsg * vmsg,int * do_reply)193  static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
194  {
195      if (vmsg->request == VHOST_USER_NONE) {
196          dev->panic(dev, "disconnect");
197          return true;
198      }
199      return false;
200  }
201  
202  static const VuDevIface vu_blk_iface = {
203      .get_features          = vu_blk_get_features,
204      .queue_set_started     = vu_blk_queue_set_started,
205      .get_protocol_features = vu_blk_get_protocol_features,
206      .get_config            = vu_blk_get_config,
207      .set_config            = vu_blk_set_config,
208      .process_msg           = vu_blk_process_msg,
209  };
210  
blk_aio_attached(AioContext * ctx,void * opaque)211  static void blk_aio_attached(AioContext *ctx, void *opaque)
212  {
213      VuBlkExport *vexp = opaque;
214  
215      /*
216       * The actual attach will happen in vu_blk_drained_end() and we just
217       * restore ctx here.
218       */
219      vexp->export.ctx = ctx;
220  }
221  
blk_aio_detach(void * opaque)222  static void blk_aio_detach(void *opaque)
223  {
224      VuBlkExport *vexp = opaque;
225  
226      /*
227       * The actual detach already happened in vu_blk_drained_begin() but from
228       * this point on we must not access ctx anymore.
229       */
230      vexp->export.ctx = NULL;
231  }
232  
233  static void
vu_blk_initialize_config(BlockDriverState * bs,struct virtio_blk_config * config,uint32_t blk_size,uint16_t num_queues)234  vu_blk_initialize_config(BlockDriverState *bs,
235                           struct virtio_blk_config *config,
236                           uint32_t blk_size,
237                           uint16_t num_queues)
238  {
239      config->capacity =
240          cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS);
241      config->blk_size = cpu_to_le32(blk_size);
242      config->size_max = cpu_to_le32(0);
243      config->seg_max = cpu_to_le32(128 - 2);
244      config->min_io_size = cpu_to_le16(1);
245      config->opt_io_size = cpu_to_le32(1);
246      config->num_queues = cpu_to_le16(num_queues);
247      config->max_discard_sectors =
248          cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
249      config->max_discard_seg = cpu_to_le32(1);
250      config->discard_sector_alignment =
251          cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS);
252      config->max_write_zeroes_sectors
253          = cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
254      config->max_write_zeroes_seg = cpu_to_le32(1);
255  }
256  
vu_blk_exp_request_shutdown(BlockExport * exp)257  static void vu_blk_exp_request_shutdown(BlockExport *exp)
258  {
259      VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
260  
261      vhost_user_server_stop(&vexp->vu_server);
262  }
263  
vu_blk_exp_resize(void * opaque)264  static void vu_blk_exp_resize(void *opaque)
265  {
266      VuBlkExport *vexp = opaque;
267      BlockDriverState *bs = blk_bs(vexp->handler.blk);
268      int64_t new_size = bdrv_getlength(bs);
269  
270      if (new_size < 0) {
271          error_printf("Failed to get length of block node '%s'",
272                       bdrv_get_node_name(bs));
273          return;
274      }
275  
276      vexp->blkcfg.capacity = cpu_to_le64(new_size >> VIRTIO_BLK_SECTOR_BITS);
277  
278      vu_config_change_msg(&vexp->vu_server.vu_dev);
279  }
280  
vu_blk_drained_begin(void * opaque)281  static void vu_blk_drained_begin(void *opaque)
282  {
283      VuBlkExport *vexp = opaque;
284  
285      vexp->vu_server.quiescing = true;
286      vhost_user_server_detach_aio_context(&vexp->vu_server);
287  }
288  
vu_blk_drained_end(void * opaque)289  static void vu_blk_drained_end(void *opaque)
290  {
291      VuBlkExport *vexp = opaque;
292  
293      vexp->vu_server.quiescing = false;
294      vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx);
295  }
296  
297  /*
298   * Ensures that bdrv_drained_begin() waits until in-flight requests complete
299   * and the server->co_trip coroutine has terminated. It will be restarted in
300   * vhost_user_server_attach_aio_context().
301   */
vu_blk_drained_poll(void * opaque)302  static bool vu_blk_drained_poll(void *opaque)
303  {
304      VuBlkExport *vexp = opaque;
305      VuServer *server = &vexp->vu_server;
306  
307      return server->co_trip || vhost_user_server_has_in_flight(server);
308  }
309  
310  static const BlockDevOps vu_blk_dev_ops = {
311      .drained_begin = vu_blk_drained_begin,
312      .drained_end   = vu_blk_drained_end,
313      .drained_poll  = vu_blk_drained_poll,
314      .resize_cb = vu_blk_exp_resize,
315  };
316  
vu_blk_exp_create(BlockExport * exp,BlockExportOptions * opts,Error ** errp)317  static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
318                               Error **errp)
319  {
320      VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
321      BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
322      uint64_t logical_block_size;
323      uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT;
324  
325      vexp->blkcfg.wce = 0;
326  
327      if (vu_opts->has_logical_block_size) {
328          logical_block_size = vu_opts->logical_block_size;
329      } else {
330          logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
331      }
332      if (!check_block_size("logical-block-size", logical_block_size, errp)) {
333          return -EINVAL;
334      }
335  
336      if (vu_opts->has_num_queues) {
337          num_queues = vu_opts->num_queues;
338      }
339      if (num_queues == 0) {
340          error_setg(errp, "num-queues must be greater than 0");
341          return -EINVAL;
342      }
343      vexp->handler.blk = exp->blk;
344      vexp->handler.serial = g_strdup("vhost_user_blk");
345      vexp->handler.logical_block_size = logical_block_size;
346      vexp->handler.writable = opts->writable;
347  
348      vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
349                               logical_block_size, num_queues);
350  
351      blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
352                                   vexp);
353  
354      blk_set_dev_ops(exp->blk, &vu_blk_dev_ops, vexp);
355  
356      if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
357                                   num_queues, &vu_blk_iface, errp)) {
358          blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
359                                          blk_aio_detach, vexp);
360          g_free(vexp->handler.serial);
361          return -EADDRNOTAVAIL;
362      }
363  
364      return 0;
365  }
366  
vu_blk_exp_delete(BlockExport * exp)367  static void vu_blk_exp_delete(BlockExport *exp)
368  {
369      VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
370  
371      blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
372                                      vexp);
373      g_free(vexp->handler.serial);
374  }
375  
376  const BlockExportDriver blk_exp_vhost_user_blk = {
377      .type               = BLOCK_EXPORT_TYPE_VHOST_USER_BLK,
378      .instance_size      = sizeof(VuBlkExport),
379      .create             = vu_blk_exp_create,
380      .delete             = vu_blk_exp_delete,
381      .request_shutdown   = vu_blk_exp_request_shutdown,
382  };
383