1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "standard-headers/linux/virtio_blk.h" 20 #include "contrib/libvhost-user/libvhost-user-glib.h" 21 #include "contrib/libvhost-user/libvhost-user.h" 22 23 24 struct virtio_blk_inhdr { 25 unsigned char status; 26 }; 27 28 /* vhost user block device */ 29 typedef struct VubDev { 30 VugDev parent; 31 int blk_fd; 32 struct virtio_blk_config blkcfg; 33 bool enable_ro; 34 char *blk_name; 35 GMainLoop *loop; 36 } VubDev; 37 38 typedef struct VubReq { 39 VuVirtqElement *elem; 40 int64_t sector_num; 41 size_t size; 42 struct virtio_blk_inhdr *in; 43 struct virtio_blk_outhdr *out; 44 VubDev *vdev_blk; 45 struct VuVirtq *vq; 46 } VubReq; 47 48 /* refer util/iov.c */ 49 static size_t vub_iov_size(const struct iovec *iov, 50 const unsigned int iov_cnt) 51 { 52 size_t len; 53 unsigned int i; 54 55 len = 0; 56 for (i = 0; i < iov_cnt; i++) { 57 len += iov[i].iov_len; 58 } 59 return len; 60 } 61 62 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 63 { 64 VugDev *gdev; 65 VubDev *vdev_blk; 66 67 assert(vu_dev); 68 69 gdev = container_of(vu_dev, VugDev, parent); 70 vdev_blk = container_of(gdev, VubDev, parent); 71 if (buf) { 72 g_warning("vu_panic: %s", buf); 73 } 74 75 g_main_loop_quit(vdev_blk->loop); 76 } 77 78 static void vub_req_complete(VubReq *req) 79 { 80 VugDev *gdev = &req->vdev_blk->parent; 81 VuDev *vu_dev = &gdev->parent; 82 83 /* IO size with 1 extra status byte */ 84 vu_queue_push(vu_dev, req->vq, req->elem, 85 req->size + 1); 86 vu_queue_notify(vu_dev, req->vq); 87 88 if (req->elem) { 89 free(req->elem); 90 } 91 92 g_free(req); 93 } 94 95 static int vub_open(const char *file_name, bool wce) 96 { 97 int fd; 98 int flags = O_RDWR; 99 100 if (!wce) { 101 flags |= O_DIRECT; 102 } 103 104 fd = open(file_name, flags); 105 if (fd < 0) { 106 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 107 strerror(errno)); 108 return -1; 109 } 110 111 return fd; 112 } 113 114 static ssize_t 115 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 116 { 117 VubDev *vdev_blk = req->vdev_blk; 118 ssize_t rc; 119 120 if (!iovcnt) { 121 fprintf(stderr, "Invalid Read IOV count\n"); 122 return -1; 123 } 124 125 req->size = vub_iov_size(iov, iovcnt); 126 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 127 if (rc < 0) { 128 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 129 vdev_blk->blk_name, req->sector_num, req->size, 130 strerror(errno)); 131 return -1; 132 } 133 134 return rc; 135 } 136 137 static ssize_t 138 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 139 { 140 VubDev *vdev_blk = req->vdev_blk; 141 ssize_t rc; 142 143 if (!iovcnt) { 144 fprintf(stderr, "Invalid Write IOV count\n"); 145 return -1; 146 } 147 148 req->size = vub_iov_size(iov, iovcnt); 149 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 150 if (rc < 0) { 151 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 152 vdev_blk->blk_name, req->sector_num, req->size, 153 strerror(errno)); 154 return -1; 155 } 156 157 return rc; 158 } 159 160 static void 161 vub_flush(VubReq *req) 162 { 163 VubDev *vdev_blk = req->vdev_blk; 164 165 fdatasync(vdev_blk->blk_fd); 166 } 167 168 static int vub_virtio_process_req(VubDev *vdev_blk, 169 VuVirtq *vq) 170 { 171 VugDev *gdev = &vdev_blk->parent; 172 VuDev *vu_dev = &gdev->parent; 173 VuVirtqElement *elem; 174 uint32_t type; 175 unsigned in_num; 176 unsigned out_num; 177 VubReq *req; 178 179 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 180 if (!elem) { 181 return -1; 182 } 183 184 /* refer to hw/block/virtio_blk.c */ 185 if (elem->out_num < 1 || elem->in_num < 1) { 186 fprintf(stderr, "virtio-blk request missing headers\n"); 187 free(elem); 188 return -1; 189 } 190 191 req = g_new0(VubReq, 1); 192 req->vdev_blk = vdev_blk; 193 req->vq = vq; 194 req->elem = elem; 195 196 in_num = elem->in_num; 197 out_num = elem->out_num; 198 199 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 200 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 201 fprintf(stderr, "Invalid outhdr size\n"); 202 goto err; 203 } 204 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 205 out_num--; 206 207 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 208 fprintf(stderr, "Invalid inhdr size\n"); 209 goto err; 210 } 211 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 212 in_num--; 213 214 type = le32toh(req->out->type); 215 switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { 216 case VIRTIO_BLK_T_IN: { 217 ssize_t ret = 0; 218 bool is_write = type & VIRTIO_BLK_T_OUT; 219 req->sector_num = le64toh(req->out->sector); 220 if (is_write) { 221 ret = vub_writev(req, &elem->out_sg[1], out_num); 222 } else { 223 ret = vub_readv(req, &elem->in_sg[0], in_num); 224 } 225 if (ret >= 0) { 226 req->in->status = VIRTIO_BLK_S_OK; 227 } else { 228 req->in->status = VIRTIO_BLK_S_IOERR; 229 } 230 vub_req_complete(req); 231 break; 232 } 233 case VIRTIO_BLK_T_FLUSH: { 234 vub_flush(req); 235 req->in->status = VIRTIO_BLK_S_OK; 236 vub_req_complete(req); 237 break; 238 } 239 case VIRTIO_BLK_T_GET_ID: { 240 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 241 VIRTIO_BLK_ID_BYTES); 242 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 243 req->in->status = VIRTIO_BLK_S_OK; 244 req->size = elem->in_sg[0].iov_len; 245 vub_req_complete(req); 246 break; 247 } 248 default: { 249 req->in->status = VIRTIO_BLK_S_UNSUPP; 250 vub_req_complete(req); 251 break; 252 } 253 } 254 255 return 0; 256 257 err: 258 free(elem); 259 g_free(req); 260 return -1; 261 } 262 263 static void vub_process_vq(VuDev *vu_dev, int idx) 264 { 265 VugDev *gdev; 266 VubDev *vdev_blk; 267 VuVirtq *vq; 268 int ret; 269 270 if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { 271 fprintf(stderr, "VQ Index out of range: %d\n", idx); 272 vub_panic_cb(vu_dev, NULL); 273 return; 274 } 275 276 gdev = container_of(vu_dev, VugDev, parent); 277 vdev_blk = container_of(gdev, VubDev, parent); 278 assert(vdev_blk); 279 280 vq = vu_get_queue(vu_dev, idx); 281 assert(vq); 282 283 while (1) { 284 ret = vub_virtio_process_req(vdev_blk, vq); 285 if (ret) { 286 break; 287 } 288 } 289 } 290 291 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 292 { 293 VuVirtq *vq; 294 295 assert(vu_dev); 296 297 vq = vu_get_queue(vu_dev, idx); 298 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 299 } 300 301 static uint64_t 302 vub_get_features(VuDev *dev) 303 { 304 uint64_t features; 305 VugDev *gdev; 306 VubDev *vdev_blk; 307 308 gdev = container_of(dev, VugDev, parent); 309 vdev_blk = container_of(gdev, VubDev, parent); 310 311 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 312 1ull << VIRTIO_BLK_F_SEG_MAX | 313 1ull << VIRTIO_BLK_F_TOPOLOGY | 314 1ull << VIRTIO_BLK_F_BLK_SIZE | 315 1ull << VIRTIO_BLK_F_FLUSH | 316 1ull << VIRTIO_BLK_F_CONFIG_WCE | 317 1ull << VIRTIO_F_VERSION_1 | 318 1ull << VHOST_USER_F_PROTOCOL_FEATURES; 319 320 if (vdev_blk->enable_ro) { 321 features |= 1ull << VIRTIO_BLK_F_RO; 322 } 323 324 return features; 325 } 326 327 static uint64_t 328 vub_get_protocol_features(VuDev *dev) 329 { 330 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG; 331 } 332 333 static int 334 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 335 { 336 VugDev *gdev; 337 VubDev *vdev_blk; 338 339 gdev = container_of(vu_dev, VugDev, parent); 340 vdev_blk = container_of(gdev, VubDev, parent); 341 memcpy(config, &vdev_blk->blkcfg, len); 342 343 return 0; 344 } 345 346 static int 347 vub_set_config(VuDev *vu_dev, const uint8_t *data, 348 uint32_t offset, uint32_t size, uint32_t flags) 349 { 350 VugDev *gdev; 351 VubDev *vdev_blk; 352 uint8_t wce; 353 int fd; 354 355 /* don't support live migration */ 356 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { 357 return -1; 358 } 359 360 gdev = container_of(vu_dev, VugDev, parent); 361 vdev_blk = container_of(gdev, VubDev, parent); 362 363 if (offset != offsetof(struct virtio_blk_config, wce) || 364 size != 1) { 365 return -1; 366 } 367 368 wce = *data; 369 if (wce == vdev_blk->blkcfg.wce) { 370 /* Do nothing as same with old configuration */ 371 return 0; 372 } 373 374 vdev_blk->blkcfg.wce = wce; 375 fprintf(stdout, "Write Cache Policy Changed\n"); 376 if (vdev_blk->blk_fd >= 0) { 377 close(vdev_blk->blk_fd); 378 vdev_blk->blk_fd = -1; 379 } 380 381 fd = vub_open(vdev_blk->blk_name, wce); 382 if (fd < 0) { 383 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 384 vdev_blk->blk_fd = -1; 385 return -1; 386 } 387 vdev_blk->blk_fd = fd; 388 389 return 0; 390 } 391 392 static const VuDevIface vub_iface = { 393 .get_features = vub_get_features, 394 .queue_set_started = vub_queue_set_started, 395 .get_protocol_features = vub_get_protocol_features, 396 .get_config = vub_get_config, 397 .set_config = vub_set_config, 398 }; 399 400 static int unix_sock_new(char *unix_fn) 401 { 402 int sock; 403 struct sockaddr_un un; 404 size_t len; 405 406 assert(unix_fn); 407 408 sock = socket(AF_UNIX, SOCK_STREAM, 0); 409 if (sock <= 0) { 410 perror("socket"); 411 return -1; 412 } 413 414 un.sun_family = AF_UNIX; 415 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 416 len = sizeof(un.sun_family) + strlen(un.sun_path); 417 418 (void)unlink(unix_fn); 419 if (bind(sock, (struct sockaddr *)&un, len) < 0) { 420 perror("bind"); 421 goto fail; 422 } 423 424 if (listen(sock, 1) < 0) { 425 perror("listen"); 426 goto fail; 427 } 428 429 return sock; 430 431 fail: 432 (void)close(sock); 433 434 return -1; 435 } 436 437 static void vub_free(struct VubDev *vdev_blk) 438 { 439 if (!vdev_blk) { 440 return; 441 } 442 443 g_main_loop_unref(vdev_blk->loop); 444 if (vdev_blk->blk_fd >= 0) { 445 close(vdev_blk->blk_fd); 446 } 447 g_free(vdev_blk); 448 } 449 450 static uint32_t 451 vub_get_blocksize(int fd) 452 { 453 uint32_t blocksize = 512; 454 455 #if defined(__linux__) && defined(BLKSSZGET) 456 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 457 return blocklen; 458 } 459 #endif 460 461 return blocksize; 462 } 463 464 static void 465 vub_initialize_config(int fd, struct virtio_blk_config *config) 466 { 467 off64_t capacity; 468 469 capacity = lseek64(fd, 0, SEEK_END); 470 config->capacity = capacity >> 9; 471 config->blk_size = vub_get_blocksize(fd); 472 config->size_max = 65536; 473 config->seg_max = 128 - 2; 474 config->min_io_size = 1; 475 config->opt_io_size = 1; 476 config->num_queues = 1; 477 } 478 479 static VubDev * 480 vub_new(char *blk_file) 481 { 482 VubDev *vdev_blk; 483 484 vdev_blk = g_new0(VubDev, 1); 485 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 486 vdev_blk->blk_fd = vub_open(blk_file, 0); 487 if (vdev_blk->blk_fd < 0) { 488 fprintf(stderr, "Error to open block device %s\n", blk_file); 489 vub_free(vdev_blk); 490 return NULL; 491 } 492 vdev_blk->enable_ro = false; 493 vdev_blk->blkcfg.wce = 0; 494 vdev_blk->blk_name = blk_file; 495 496 /* fill virtio_blk_config with block parameters */ 497 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 498 499 return vdev_blk; 500 } 501 502 int main(int argc, char **argv) 503 { 504 int opt; 505 char *unix_socket = NULL; 506 char *blk_file = NULL; 507 bool enable_ro = false; 508 int lsock = -1, csock = -1; 509 VubDev *vdev_blk = NULL; 510 511 while ((opt = getopt(argc, argv, "b:rs:h")) != -1) { 512 switch (opt) { 513 case 'b': 514 blk_file = g_strdup(optarg); 515 break; 516 case 's': 517 unix_socket = g_strdup(optarg); 518 break; 519 case 'r': 520 enable_ro = true; 521 break; 522 case 'h': 523 default: 524 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 525 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 526 return 0; 527 } 528 } 529 530 if (!unix_socket || !blk_file) { 531 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 532 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 533 return -1; 534 } 535 536 lsock = unix_sock_new(unix_socket); 537 if (lsock < 0) { 538 goto err; 539 } 540 541 csock = accept(lsock, (void *)0, (void *)0); 542 if (csock < 0) { 543 fprintf(stderr, "Accept error %s\n", strerror(errno)); 544 goto err; 545 } 546 547 vdev_blk = vub_new(blk_file); 548 if (!vdev_blk) { 549 goto err; 550 } 551 if (enable_ro) { 552 vdev_blk->enable_ro = true; 553 } 554 555 vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); 556 557 g_main_loop_run(vdev_blk->loop); 558 559 vug_deinit(&vdev_blk->parent); 560 561 err: 562 vub_free(vdev_blk); 563 if (csock >= 0) { 564 close(csock); 565 } 566 if (lsock >= 0) { 567 close(lsock); 568 } 569 g_free(unix_socket); 570 g_free(blk_file); 571 572 return 0; 573 } 574