1 /* 2 * Export QEMU block device via VDUSE 3 * 4 * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved. 5 * 6 * Author: 7 * Xie Yongji <xieyongji@bytedance.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 */ 12 13 #include <sys/eventfd.h> 14 15 #include "qemu/osdep.h" 16 #include "qapi/error.h" 17 #include "block/export.h" 18 #include "qemu/error-report.h" 19 #include "util/block-helpers.h" 20 #include "subprojects/libvduse/libvduse.h" 21 #include "virtio-blk-handler.h" 22 23 #include "standard-headers/linux/virtio_blk.h" 24 25 #define VDUSE_DEFAULT_NUM_QUEUE 1 26 #define VDUSE_DEFAULT_QUEUE_SIZE 256 27 28 typedef struct VduseBlkExport { 29 BlockExport export; 30 VirtioBlkHandler handler; 31 VduseDev *dev; 32 uint16_t num_queues; 33 char *recon_file; 34 unsigned int inflight; 35 } VduseBlkExport; 36 37 typedef struct VduseBlkReq { 38 VduseVirtqElement elem; 39 VduseVirtq *vq; 40 } VduseBlkReq; 41 42 static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp) 43 { 44 vblk_exp->inflight++; 45 } 46 47 static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp) 48 { 49 if (--vblk_exp->inflight == 0) { 50 aio_wait_kick(); 51 } 52 } 53 54 static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len) 55 { 56 vduse_queue_push(req->vq, &req->elem, in_len); 57 vduse_queue_notify(req->vq); 58 59 free(req); 60 } 61 62 static void coroutine_fn vduse_blk_virtio_process_req(void *opaque) 63 { 64 VduseBlkReq *req = opaque; 65 VduseVirtq *vq = req->vq; 66 VduseDev *dev = vduse_queue_get_dev(vq); 67 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); 68 VirtioBlkHandler *handler = &vblk_exp->handler; 69 VduseVirtqElement *elem = &req->elem; 70 struct iovec *in_iov = elem->in_sg; 71 struct iovec *out_iov = elem->out_sg; 72 unsigned in_num = elem->in_num; 73 unsigned out_num = elem->out_num; 74 int in_len; 75 76 in_len = virtio_blk_process_req(handler, in_iov, 77 out_iov, in_num, out_num); 78 if (in_len < 0) { 79 free(req); 80 return; 81 } 82 83 vduse_blk_req_complete(req, in_len); 84 vduse_blk_inflight_dec(vblk_exp); 85 } 86 87 static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq) 88 { 89 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); 90 91 while (1) { 92 VduseBlkReq *req; 93 94 req = vduse_queue_pop(vq, sizeof(VduseBlkReq)); 95 if (!req) { 96 break; 97 } 98 req->vq = vq; 99 100 Coroutine *co = 101 qemu_coroutine_create(vduse_blk_virtio_process_req, req); 102 103 vduse_blk_inflight_inc(vblk_exp); 104 qemu_coroutine_enter(co); 105 } 106 } 107 108 static void on_vduse_vq_kick(void *opaque) 109 { 110 VduseVirtq *vq = opaque; 111 VduseDev *dev = vduse_queue_get_dev(vq); 112 int fd = vduse_queue_get_fd(vq); 113 eventfd_t kick_data; 114 115 if (eventfd_read(fd, &kick_data) == -1) { 116 error_report("failed to read data from eventfd"); 117 return; 118 } 119 120 vduse_blk_vq_handler(dev, vq); 121 } 122 123 static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq) 124 { 125 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); 126 127 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), 128 true, on_vduse_vq_kick, NULL, NULL, NULL, vq); 129 /* Make sure we don't miss any kick afer reconnecting */ 130 eventfd_write(vduse_queue_get_fd(vq), 1); 131 } 132 133 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq) 134 { 135 VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); 136 137 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), 138 true, NULL, NULL, NULL, NULL, NULL); 139 } 140 141 static const VduseOps vduse_blk_ops = { 142 .enable_queue = vduse_blk_enable_queue, 143 .disable_queue = vduse_blk_disable_queue, 144 }; 145 146 static void on_vduse_dev_kick(void *opaque) 147 { 148 VduseDev *dev = opaque; 149 150 vduse_dev_handler(dev); 151 } 152 153 static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx) 154 { 155 int i; 156 157 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), 158 true, on_vduse_dev_kick, NULL, NULL, NULL, 159 vblk_exp->dev); 160 161 for (i = 0; i < vblk_exp->num_queues; i++) { 162 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); 163 int fd = vduse_queue_get_fd(vq); 164 165 if (fd < 0) { 166 continue; 167 } 168 aio_set_fd_handler(vblk_exp->export.ctx, fd, true, 169 on_vduse_vq_kick, NULL, NULL, NULL, vq); 170 } 171 } 172 173 static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp) 174 { 175 int i; 176 177 for (i = 0; i < vblk_exp->num_queues; i++) { 178 VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); 179 int fd = vduse_queue_get_fd(vq); 180 181 if (fd < 0) { 182 continue; 183 } 184 aio_set_fd_handler(vblk_exp->export.ctx, fd, 185 true, NULL, NULL, NULL, NULL, NULL); 186 } 187 aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), 188 true, NULL, NULL, NULL, NULL, NULL); 189 190 AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0); 191 } 192 193 194 static void blk_aio_attached(AioContext *ctx, void *opaque) 195 { 196 VduseBlkExport *vblk_exp = opaque; 197 198 vblk_exp->export.ctx = ctx; 199 vduse_blk_attach_ctx(vblk_exp, ctx); 200 } 201 202 static void blk_aio_detach(void *opaque) 203 { 204 VduseBlkExport *vblk_exp = opaque; 205 206 vduse_blk_detach_ctx(vblk_exp); 207 vblk_exp->export.ctx = NULL; 208 } 209 210 static void vduse_blk_resize(void *opaque) 211 { 212 BlockExport *exp = opaque; 213 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); 214 struct virtio_blk_config config; 215 216 config.capacity = 217 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS); 218 vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity), 219 offsetof(struct virtio_blk_config, capacity), 220 (char *)&config.capacity); 221 } 222 223 static const BlockDevOps vduse_block_ops = { 224 .resize_cb = vduse_blk_resize, 225 }; 226 227 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, 228 Error **errp) 229 { 230 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); 231 BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk; 232 uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE; 233 uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE; 234 uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE; 235 Error *local_err = NULL; 236 struct virtio_blk_config config = { 0 }; 237 uint64_t features; 238 int i, ret; 239 240 if (vblk_opts->has_num_queues) { 241 num_queues = vblk_opts->num_queues; 242 if (num_queues == 0) { 243 error_setg(errp, "num-queues must be greater than 0"); 244 return -EINVAL; 245 } 246 } 247 248 if (vblk_opts->has_queue_size) { 249 queue_size = vblk_opts->queue_size; 250 if (queue_size <= 2 || !is_power_of_2(queue_size) || 251 queue_size > VIRTQUEUE_MAX_SIZE) { 252 error_setg(errp, "queue-size is invalid"); 253 return -EINVAL; 254 } 255 } 256 257 if (vblk_opts->has_logical_block_size) { 258 logical_block_size = vblk_opts->logical_block_size; 259 check_block_size(exp->id, "logical-block-size", logical_block_size, 260 &local_err); 261 if (local_err) { 262 error_propagate(errp, local_err); 263 return -EINVAL; 264 } 265 } 266 vblk_exp->num_queues = num_queues; 267 vblk_exp->handler.blk = exp->blk; 268 vblk_exp->handler.serial = g_strdup(vblk_opts->has_serial ? 269 vblk_opts->serial : ""); 270 vblk_exp->handler.logical_block_size = logical_block_size; 271 vblk_exp->handler.writable = opts->writable; 272 273 config.capacity = 274 cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS); 275 config.seg_max = cpu_to_le32(queue_size - 2); 276 config.min_io_size = cpu_to_le16(1); 277 config.opt_io_size = cpu_to_le32(1); 278 config.num_queues = cpu_to_le16(num_queues); 279 config.blk_size = cpu_to_le32(logical_block_size); 280 config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS); 281 config.max_discard_seg = cpu_to_le32(1); 282 config.discard_sector_alignment = 283 cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS); 284 config.max_write_zeroes_sectors = 285 cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS); 286 config.max_write_zeroes_seg = cpu_to_le32(1); 287 288 features = vduse_get_virtio_features() | 289 (1ULL << VIRTIO_BLK_F_SEG_MAX) | 290 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | 291 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | 292 (1ULL << VIRTIO_BLK_F_FLUSH) | 293 (1ULL << VIRTIO_BLK_F_DISCARD) | 294 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES); 295 296 if (num_queues > 1) { 297 features |= 1ULL << VIRTIO_BLK_F_MQ; 298 } 299 if (!opts->writable) { 300 features |= 1ULL << VIRTIO_BLK_F_RO; 301 } 302 303 vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0, 304 features, num_queues, 305 sizeof(struct virtio_blk_config), 306 (char *)&config, &vduse_blk_ops, 307 vblk_exp); 308 if (!vblk_exp->dev) { 309 error_setg(errp, "failed to create vduse device"); 310 ret = -ENOMEM; 311 goto err_dev; 312 } 313 314 vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s", 315 g_get_tmp_dir(), vblk_opts->name); 316 if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) { 317 error_setg(errp, "failed to set reconnect log file"); 318 ret = -EINVAL; 319 goto err; 320 } 321 322 for (i = 0; i < num_queues; i++) { 323 vduse_dev_setup_queue(vblk_exp->dev, i, queue_size); 324 } 325 326 aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true, 327 on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev); 328 329 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, 330 vblk_exp); 331 332 blk_set_dev_ops(exp->blk, &vduse_block_ops, exp); 333 334 return 0; 335 err: 336 vduse_dev_destroy(vblk_exp->dev); 337 g_free(vblk_exp->recon_file); 338 err_dev: 339 g_free(vblk_exp->handler.serial); 340 return ret; 341 } 342 343 static void vduse_blk_exp_delete(BlockExport *exp) 344 { 345 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); 346 int ret; 347 348 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, 349 vblk_exp); 350 blk_set_dev_ops(exp->blk, NULL, NULL); 351 ret = vduse_dev_destroy(vblk_exp->dev); 352 if (ret != -EBUSY) { 353 unlink(vblk_exp->recon_file); 354 } 355 g_free(vblk_exp->recon_file); 356 g_free(vblk_exp->handler.serial); 357 } 358 359 static void vduse_blk_exp_request_shutdown(BlockExport *exp) 360 { 361 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); 362 363 aio_context_acquire(vblk_exp->export.ctx); 364 vduse_blk_detach_ctx(vblk_exp); 365 aio_context_acquire(vblk_exp->export.ctx); 366 } 367 368 const BlockExportDriver blk_exp_vduse_blk = { 369 .type = BLOCK_EXPORT_TYPE_VDUSE_BLK, 370 .instance_size = sizeof(VduseBlkExport), 371 .create = vduse_blk_exp_create, 372 .delete = vduse_blk_exp_delete, 373 .request_shutdown = vduse_blk_exp_request_shutdown, 374 }; 375