1 /* 2 * Sharing QEMU block devices via vhost-user protocol 3 * 4 * Parts of the code based on nbd/server.c. 5 * 6 * Copyright (c) Coiby Xu <coiby.xu@gmail.com>. 7 * Copyright (c) 2020 Red Hat, Inc. 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "qemu/error-report.h" 14 #include "block/block.h" 15 #include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */ 16 #include "standard-headers/linux/virtio_blk.h" 17 #include "qemu/vhost-user-server.h" 18 #include "vhost-user-blk-server.h" 19 #include "qapi/error.h" 20 #include "qom/object_interfaces.h" 21 #include "util/block-helpers.h" 22 #include "virtio-blk-handler.h" 23 24 enum { 25 VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, 26 }; 27 28 typedef struct VuBlkReq { 29 VuVirtqElement elem; 30 VuServer *server; 31 struct VuVirtq *vq; 32 } VuBlkReq; 33 34 /* vhost user block device */ 35 typedef struct { 36 BlockExport export; 37 VuServer vu_server; 38 VirtioBlkHandler handler; 39 QIOChannelSocket *sioc; 40 struct virtio_blk_config blkcfg; 41 } VuBlkExport; 42 43 static void vu_blk_req_complete(VuBlkReq *req, size_t in_len) 44 { 45 VuDev *vu_dev = &req->server->vu_dev; 46 47 vu_queue_push(vu_dev, req->vq, &req->elem, in_len); 48 vu_queue_notify(vu_dev, req->vq); 49 50 free(req); 51 } 52 53 /* 54 * Called with server in_flight counter increased, must decrease before 55 * returning. 56 */ 57 static void coroutine_fn vu_blk_virtio_process_req(void *opaque) 58 { 59 VuBlkReq *req = opaque; 60 VuServer *server = req->server; 61 VuVirtqElement *elem = &req->elem; 62 VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); 63 VirtioBlkHandler *handler = &vexp->handler; 64 struct iovec *in_iov = elem->in_sg; 65 struct iovec *out_iov = elem->out_sg; 66 unsigned in_num = elem->in_num; 67 unsigned out_num = elem->out_num; 68 int in_len; 69 70 in_len = virtio_blk_process_req(handler, in_iov, out_iov, 71 in_num, out_num); 72 if (in_len < 0) { 73 free(req); 74 vhost_user_server_dec_in_flight(server); 75 return; 76 } 77 78 vu_blk_req_complete(req, in_len); 79 vhost_user_server_dec_in_flight(server); 80 } 81 82 static void vu_blk_process_vq(VuDev *vu_dev, int idx) 83 { 84 VuServer *server = container_of(vu_dev, VuServer, vu_dev); 85 VuVirtq *vq = vu_get_queue(vu_dev, idx); 86 87 while (1) { 88 VuBlkReq *req; 89 90 req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq)); 91 if (!req) { 92 break; 93 } 94 95 req->server = server; 96 req->vq = vq; 97 98 Coroutine *co = 99 qemu_coroutine_create(vu_blk_virtio_process_req, req); 100 101 vhost_user_server_inc_in_flight(server); 102 qemu_coroutine_enter(co); 103 } 104 } 105 106 static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started) 107 { 108 VuVirtq *vq; 109 110 assert(vu_dev); 111 112 vq = vu_get_queue(vu_dev, idx); 113 vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL); 114 } 115 116 static uint64_t vu_blk_get_features(VuDev *dev) 117 { 118 uint64_t features; 119 VuServer *server = container_of(dev, VuServer, vu_dev); 120 VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); 121 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 122 1ull << VIRTIO_BLK_F_SEG_MAX | 123 1ull << VIRTIO_BLK_F_TOPOLOGY | 124 1ull << VIRTIO_BLK_F_BLK_SIZE | 125 1ull << VIRTIO_BLK_F_FLUSH | 126 1ull << VIRTIO_BLK_F_DISCARD | 127 1ull << VIRTIO_BLK_F_WRITE_ZEROES | 128 1ull << VIRTIO_BLK_F_CONFIG_WCE | 129 1ull << VIRTIO_BLK_F_MQ | 130 1ull << VIRTIO_F_VERSION_1 | 131 1ull << VIRTIO_RING_F_INDIRECT_DESC | 132 1ull << VIRTIO_RING_F_EVENT_IDX | 133 1ull << VHOST_USER_F_PROTOCOL_FEATURES; 134 135 if (!vexp->handler.writable) { 136 features |= 1ull << VIRTIO_BLK_F_RO; 137 } 138 139 return features; 140 } 141 142 static uint64_t vu_blk_get_protocol_features(VuDev *dev) 143 { 144 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG; 145 } 146 147 static int 148 vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 149 { 150 VuServer *server = container_of(vu_dev, VuServer, vu_dev); 151 VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); 152 153 if (len > sizeof(struct virtio_blk_config)) { 154 return -1; 155 } 156 157 memcpy(config, &vexp->blkcfg, len); 158 return 0; 159 } 160 161 static int 162 vu_blk_set_config(VuDev *vu_dev, const uint8_t *data, 163 uint32_t offset, uint32_t size, uint32_t flags) 164 { 165 VuServer *server = container_of(vu_dev, VuServer, vu_dev); 166 VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server); 167 uint8_t wce; 168 169 /* don't support live migration */ 170 if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) { 171 return -EINVAL; 172 } 173 174 if (offset != offsetof(struct virtio_blk_config, wce) || 175 size != 1) { 176 return -EINVAL; 177 } 178 179 wce = *data; 180 vexp->blkcfg.wce = wce; 181 blk_set_enable_write_cache(vexp->export.blk, wce); 182 return 0; 183 } 184 185 /* 186 * When the client disconnects, it sends a VHOST_USER_NONE request 187 * and vu_process_message will simple call exit which cause the VM 188 * to exit abruptly. 189 * To avoid this issue, process VHOST_USER_NONE request ahead 190 * of vu_process_message. 191 * 192 */ 193 static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply) 194 { 195 if (vmsg->request == VHOST_USER_NONE) { 196 dev->panic(dev, "disconnect"); 197 return true; 198 } 199 return false; 200 } 201 202 static const VuDevIface vu_blk_iface = { 203 .get_features = vu_blk_get_features, 204 .queue_set_started = vu_blk_queue_set_started, 205 .get_protocol_features = vu_blk_get_protocol_features, 206 .get_config = vu_blk_get_config, 207 .set_config = vu_blk_set_config, 208 .process_msg = vu_blk_process_msg, 209 }; 210 211 static void blk_aio_attached(AioContext *ctx, void *opaque) 212 { 213 VuBlkExport *vexp = opaque; 214 215 /* 216 * The actual attach will happen in vu_blk_drained_end() and we just 217 * restore ctx here. 218 */ 219 vexp->export.ctx = ctx; 220 } 221 222 static void blk_aio_detach(void *opaque) 223 { 224 VuBlkExport *vexp = opaque; 225 226 /* 227 * The actual detach already happened in vu_blk_drained_begin() but from 228 * this point on we must not access ctx anymore. 229 */ 230 vexp->export.ctx = NULL; 231 } 232 233 static void 234 vu_blk_initialize_config(BlockDriverState *bs, 235 struct virtio_blk_config *config, 236 uint32_t blk_size, 237 uint16_t num_queues) 238 { 239 config->capacity = 240 cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS); 241 config->blk_size = cpu_to_le32(blk_size); 242 config->size_max = cpu_to_le32(0); 243 config->seg_max = cpu_to_le32(128 - 2); 244 config->min_io_size = cpu_to_le16(1); 245 config->opt_io_size = cpu_to_le32(1); 246 config->num_queues = cpu_to_le16(num_queues); 247 config->max_discard_sectors = 248 cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS); 249 config->max_discard_seg = cpu_to_le32(1); 250 config->discard_sector_alignment = 251 cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS); 252 config->max_write_zeroes_sectors 253 = cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS); 254 config->max_write_zeroes_seg = cpu_to_le32(1); 255 } 256 257 static void vu_blk_exp_request_shutdown(BlockExport *exp) 258 { 259 VuBlkExport *vexp = container_of(exp, VuBlkExport, export); 260 261 vhost_user_server_stop(&vexp->vu_server); 262 } 263 264 static void vu_blk_exp_resize(void *opaque) 265 { 266 VuBlkExport *vexp = opaque; 267 BlockDriverState *bs = blk_bs(vexp->handler.blk); 268 int64_t new_size = bdrv_getlength(bs); 269 270 if (new_size < 0) { 271 error_printf("Failed to get length of block node '%s'", 272 bdrv_get_node_name(bs)); 273 return; 274 } 275 276 vexp->blkcfg.capacity = cpu_to_le64(new_size >> VIRTIO_BLK_SECTOR_BITS); 277 278 vu_config_change_msg(&vexp->vu_server.vu_dev); 279 } 280 281 /* Called with vexp->export.ctx acquired */ 282 static void vu_blk_drained_begin(void *opaque) 283 { 284 VuBlkExport *vexp = opaque; 285 286 vexp->vu_server.quiescing = true; 287 vhost_user_server_detach_aio_context(&vexp->vu_server); 288 } 289 290 /* Called with vexp->export.blk AioContext acquired */ 291 static void vu_blk_drained_end(void *opaque) 292 { 293 VuBlkExport *vexp = opaque; 294 295 vexp->vu_server.quiescing = false; 296 vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx); 297 } 298 299 /* 300 * Ensures that bdrv_drained_begin() waits until in-flight requests complete 301 * and the server->co_trip coroutine has terminated. It will be restarted in 302 * vhost_user_server_attach_aio_context(). 303 * 304 * Called with vexp->export.ctx acquired. 305 */ 306 static bool vu_blk_drained_poll(void *opaque) 307 { 308 VuBlkExport *vexp = opaque; 309 VuServer *server = &vexp->vu_server; 310 311 return server->co_trip || vhost_user_server_has_in_flight(server); 312 } 313 314 static const BlockDevOps vu_blk_dev_ops = { 315 .drained_begin = vu_blk_drained_begin, 316 .drained_end = vu_blk_drained_end, 317 .drained_poll = vu_blk_drained_poll, 318 .resize_cb = vu_blk_exp_resize, 319 }; 320 321 static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, 322 Error **errp) 323 { 324 VuBlkExport *vexp = container_of(exp, VuBlkExport, export); 325 BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk; 326 Error *local_err = NULL; 327 uint64_t logical_block_size; 328 uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT; 329 330 vexp->blkcfg.wce = 0; 331 332 if (vu_opts->has_logical_block_size) { 333 logical_block_size = vu_opts->logical_block_size; 334 } else { 335 logical_block_size = VIRTIO_BLK_SECTOR_SIZE; 336 } 337 check_block_size(exp->id, "logical-block-size", logical_block_size, 338 &local_err); 339 if (local_err) { 340 error_propagate(errp, local_err); 341 return -EINVAL; 342 } 343 344 if (vu_opts->has_num_queues) { 345 num_queues = vu_opts->num_queues; 346 } 347 if (num_queues == 0) { 348 error_setg(errp, "num-queues must be greater than 0"); 349 return -EINVAL; 350 } 351 vexp->handler.blk = exp->blk; 352 vexp->handler.serial = g_strdup("vhost_user_blk"); 353 vexp->handler.logical_block_size = logical_block_size; 354 vexp->handler.writable = opts->writable; 355 356 vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg, 357 logical_block_size, num_queues); 358 359 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, 360 vexp); 361 362 blk_set_dev_ops(exp->blk, &vu_blk_dev_ops, vexp); 363 364 if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx, 365 num_queues, &vu_blk_iface, errp)) { 366 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, 367 blk_aio_detach, vexp); 368 g_free(vexp->handler.serial); 369 return -EADDRNOTAVAIL; 370 } 371 372 return 0; 373 } 374 375 static void vu_blk_exp_delete(BlockExport *exp) 376 { 377 VuBlkExport *vexp = container_of(exp, VuBlkExport, export); 378 379 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, 380 vexp); 381 g_free(vexp->handler.serial); 382 } 383 384 const BlockExportDriver blk_exp_vhost_user_blk = { 385 .type = BLOCK_EXPORT_TYPE_VHOST_USER_BLK, 386 .instance_size = sizeof(VuBlkExport), 387 .create = vu_blk_exp_create, 388 .delete = vu_blk_exp_delete, 389 .request_shutdown = vu_blk_exp_request_shutdown, 390 }; 391