1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "standard-headers/linux/virtio_blk.h" 20 #include "contrib/libvhost-user/libvhost-user-glib.h" 21 #include "contrib/libvhost-user/libvhost-user.h" 22 23 #if defined(__linux__) 24 #include <linux/fs.h> 25 #include <sys/ioctl.h> 26 #endif 27 28 enum { 29 VHOST_USER_BLK_MAX_QUEUES = 8, 30 }; 31 32 struct virtio_blk_inhdr { 33 unsigned char status; 34 }; 35 36 /* vhost user block device */ 37 typedef struct VubDev { 38 VugDev parent; 39 int blk_fd; 40 struct virtio_blk_config blkcfg; 41 bool enable_ro; 42 char *blk_name; 43 GMainLoop *loop; 44 } VubDev; 45 46 typedef struct VubReq { 47 VuVirtqElement *elem; 48 int64_t sector_num; 49 size_t size; 50 struct virtio_blk_inhdr *in; 51 struct virtio_blk_outhdr *out; 52 VubDev *vdev_blk; 53 struct VuVirtq *vq; 54 } VubReq; 55 56 /* refer util/iov.c */ 57 static size_t vub_iov_size(const struct iovec *iov, 58 const unsigned int iov_cnt) 59 { 60 size_t len; 61 unsigned int i; 62 63 len = 0; 64 for (i = 0; i < iov_cnt; i++) { 65 len += iov[i].iov_len; 66 } 67 return len; 68 } 69 70 static size_t vub_iov_to_buf(const struct iovec *iov, 71 const unsigned int iov_cnt, void *buf) 72 { 73 size_t len; 74 unsigned int i; 75 76 len = 0; 77 for (i = 0; i < iov_cnt; i++) { 78 memcpy(buf + len, iov[i].iov_base, iov[i].iov_len); 79 len += iov[i].iov_len; 80 } 81 return len; 82 } 83 84 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 85 { 86 VugDev *gdev; 87 VubDev *vdev_blk; 88 89 assert(vu_dev); 90 91 gdev = container_of(vu_dev, VugDev, parent); 92 vdev_blk = container_of(gdev, VubDev, parent); 93 if (buf) { 94 g_warning("vu_panic: %s", buf); 95 } 96 97 g_main_loop_quit(vdev_blk->loop); 98 } 99 100 static void vub_req_complete(VubReq *req) 101 { 102 VugDev *gdev = &req->vdev_blk->parent; 103 VuDev *vu_dev = &gdev->parent; 104 105 /* IO size with 1 extra status byte */ 106 vu_queue_push(vu_dev, req->vq, req->elem, 107 req->size + 1); 108 vu_queue_notify(vu_dev, req->vq); 109 110 if (req->elem) { 111 free(req->elem); 112 } 113 114 g_free(req); 115 } 116 117 static int vub_open(const char *file_name, bool wce) 118 { 119 int fd; 120 int flags = O_RDWR; 121 122 if (!wce) { 123 flags |= O_DIRECT; 124 } 125 126 fd = open(file_name, flags); 127 if (fd < 0) { 128 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 129 strerror(errno)); 130 return -1; 131 } 132 133 return fd; 134 } 135 136 static ssize_t 137 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 138 { 139 VubDev *vdev_blk = req->vdev_blk; 140 ssize_t rc; 141 142 if (!iovcnt) { 143 fprintf(stderr, "Invalid Read IOV count\n"); 144 return -1; 145 } 146 147 req->size = vub_iov_size(iov, iovcnt); 148 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 149 if (rc < 0) { 150 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 151 vdev_blk->blk_name, req->sector_num, req->size, 152 strerror(errno)); 153 return -1; 154 } 155 156 return rc; 157 } 158 159 static ssize_t 160 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 161 { 162 VubDev *vdev_blk = req->vdev_blk; 163 ssize_t rc; 164 165 if (!iovcnt) { 166 fprintf(stderr, "Invalid Write IOV count\n"); 167 return -1; 168 } 169 170 req->size = vub_iov_size(iov, iovcnt); 171 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 172 if (rc < 0) { 173 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 174 vdev_blk->blk_name, req->sector_num, req->size, 175 strerror(errno)); 176 return -1; 177 } 178 179 return rc; 180 } 181 182 static int 183 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, 184 uint32_t type) 185 { 186 struct virtio_blk_discard_write_zeroes *desc; 187 ssize_t size; 188 void *buf; 189 190 size = vub_iov_size(iov, iovcnt); 191 if (size != sizeof(*desc)) { 192 fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc)); 193 return -1; 194 } 195 buf = g_new0(char, size); 196 vub_iov_to_buf(iov, iovcnt, buf); 197 198 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 199 VubDev *vdev_blk = req->vdev_blk; 200 desc = (struct virtio_blk_discard_write_zeroes *)buf; 201 uint64_t range[2] = { le64toh(desc->sector) << 9, 202 le32toh(desc->num_sectors) << 9 }; 203 if (type == VIRTIO_BLK_T_DISCARD) { 204 if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { 205 g_free(buf); 206 return 0; 207 } 208 } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 209 if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) { 210 g_free(buf); 211 return 0; 212 } 213 } 214 #endif 215 216 g_free(buf); 217 return -1; 218 } 219 220 static void 221 vub_flush(VubReq *req) 222 { 223 VubDev *vdev_blk = req->vdev_blk; 224 225 fdatasync(vdev_blk->blk_fd); 226 } 227 228 static int vub_virtio_process_req(VubDev *vdev_blk, 229 VuVirtq *vq) 230 { 231 VugDev *gdev = &vdev_blk->parent; 232 VuDev *vu_dev = &gdev->parent; 233 VuVirtqElement *elem; 234 uint32_t type; 235 unsigned in_num; 236 unsigned out_num; 237 VubReq *req; 238 239 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 240 if (!elem) { 241 return -1; 242 } 243 244 /* refer to hw/block/virtio_blk.c */ 245 if (elem->out_num < 1 || elem->in_num < 1) { 246 fprintf(stderr, "virtio-blk request missing headers\n"); 247 free(elem); 248 return -1; 249 } 250 251 req = g_new0(VubReq, 1); 252 req->vdev_blk = vdev_blk; 253 req->vq = vq; 254 req->elem = elem; 255 256 in_num = elem->in_num; 257 out_num = elem->out_num; 258 259 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 260 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 261 fprintf(stderr, "Invalid outhdr size\n"); 262 goto err; 263 } 264 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 265 out_num--; 266 267 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 268 fprintf(stderr, "Invalid inhdr size\n"); 269 goto err; 270 } 271 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 272 in_num--; 273 274 type = le32toh(req->out->type); 275 switch (type & ~VIRTIO_BLK_T_BARRIER) { 276 case VIRTIO_BLK_T_IN: 277 case VIRTIO_BLK_T_OUT: { 278 ssize_t ret = 0; 279 bool is_write = type & VIRTIO_BLK_T_OUT; 280 req->sector_num = le64toh(req->out->sector); 281 if (is_write) { 282 ret = vub_writev(req, &elem->out_sg[1], out_num); 283 } else { 284 ret = vub_readv(req, &elem->in_sg[0], in_num); 285 } 286 if (ret >= 0) { 287 req->in->status = VIRTIO_BLK_S_OK; 288 } else { 289 req->in->status = VIRTIO_BLK_S_IOERR; 290 } 291 vub_req_complete(req); 292 break; 293 } 294 case VIRTIO_BLK_T_FLUSH: 295 vub_flush(req); 296 req->in->status = VIRTIO_BLK_S_OK; 297 vub_req_complete(req); 298 break; 299 case VIRTIO_BLK_T_GET_ID: { 300 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 301 VIRTIO_BLK_ID_BYTES); 302 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 303 req->in->status = VIRTIO_BLK_S_OK; 304 req->size = elem->in_sg[0].iov_len; 305 vub_req_complete(req); 306 break; 307 } 308 case VIRTIO_BLK_T_DISCARD: 309 case VIRTIO_BLK_T_WRITE_ZEROES: { 310 int rc; 311 rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type); 312 if (rc == 0) { 313 req->in->status = VIRTIO_BLK_S_OK; 314 } else { 315 req->in->status = VIRTIO_BLK_S_IOERR; 316 } 317 vub_req_complete(req); 318 break; 319 } 320 default: 321 req->in->status = VIRTIO_BLK_S_UNSUPP; 322 vub_req_complete(req); 323 break; 324 } 325 326 return 0; 327 328 err: 329 free(elem); 330 g_free(req); 331 return -1; 332 } 333 334 static void vub_process_vq(VuDev *vu_dev, int idx) 335 { 336 VugDev *gdev; 337 VubDev *vdev_blk; 338 VuVirtq *vq; 339 int ret; 340 341 gdev = container_of(vu_dev, VugDev, parent); 342 vdev_blk = container_of(gdev, VubDev, parent); 343 assert(vdev_blk); 344 345 vq = vu_get_queue(vu_dev, idx); 346 assert(vq); 347 348 while (1) { 349 ret = vub_virtio_process_req(vdev_blk, vq); 350 if (ret) { 351 break; 352 } 353 } 354 } 355 356 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 357 { 358 VuVirtq *vq; 359 360 assert(vu_dev); 361 362 vq = vu_get_queue(vu_dev, idx); 363 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 364 } 365 366 static uint64_t 367 vub_get_features(VuDev *dev) 368 { 369 uint64_t features; 370 VugDev *gdev; 371 VubDev *vdev_blk; 372 373 gdev = container_of(dev, VugDev, parent); 374 vdev_blk = container_of(gdev, VubDev, parent); 375 376 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 377 1ull << VIRTIO_BLK_F_SEG_MAX | 378 1ull << VIRTIO_BLK_F_TOPOLOGY | 379 1ull << VIRTIO_BLK_F_BLK_SIZE | 380 1ull << VIRTIO_BLK_F_FLUSH | 381 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 382 1ull << VIRTIO_BLK_F_DISCARD | 383 1ull << VIRTIO_BLK_F_WRITE_ZEROES | 384 #endif 385 1ull << VIRTIO_BLK_F_CONFIG_WCE; 386 387 if (vdev_blk->enable_ro) { 388 features |= 1ull << VIRTIO_BLK_F_RO; 389 } 390 391 return features; 392 } 393 394 static uint64_t 395 vub_get_protocol_features(VuDev *dev) 396 { 397 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG | 398 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD; 399 } 400 401 static int 402 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 403 { 404 VugDev *gdev; 405 VubDev *vdev_blk; 406 407 g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1); 408 409 gdev = container_of(vu_dev, VugDev, parent); 410 vdev_blk = container_of(gdev, VubDev, parent); 411 memcpy(config, &vdev_blk->blkcfg, len); 412 413 return 0; 414 } 415 416 static int 417 vub_set_config(VuDev *vu_dev, const uint8_t *data, 418 uint32_t offset, uint32_t size, uint32_t flags) 419 { 420 VugDev *gdev; 421 VubDev *vdev_blk; 422 uint8_t wce; 423 int fd; 424 425 /* don't support live migration */ 426 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { 427 return -1; 428 } 429 430 gdev = container_of(vu_dev, VugDev, parent); 431 vdev_blk = container_of(gdev, VubDev, parent); 432 433 if (offset != offsetof(struct virtio_blk_config, wce) || 434 size != 1) { 435 return -1; 436 } 437 438 wce = *data; 439 if (wce == vdev_blk->blkcfg.wce) { 440 /* Do nothing as same with old configuration */ 441 return 0; 442 } 443 444 vdev_blk->blkcfg.wce = wce; 445 fprintf(stdout, "Write Cache Policy Changed\n"); 446 if (vdev_blk->blk_fd >= 0) { 447 close(vdev_blk->blk_fd); 448 vdev_blk->blk_fd = -1; 449 } 450 451 fd = vub_open(vdev_blk->blk_name, wce); 452 if (fd < 0) { 453 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 454 vdev_blk->blk_fd = -1; 455 return -1; 456 } 457 vdev_blk->blk_fd = fd; 458 459 return 0; 460 } 461 462 static const VuDevIface vub_iface = { 463 .get_features = vub_get_features, 464 .queue_set_started = vub_queue_set_started, 465 .get_protocol_features = vub_get_protocol_features, 466 .get_config = vub_get_config, 467 .set_config = vub_set_config, 468 }; 469 470 static int unix_sock_new(char *unix_fn) 471 { 472 int sock; 473 struct sockaddr_un un; 474 size_t len; 475 476 assert(unix_fn); 477 478 sock = socket(AF_UNIX, SOCK_STREAM, 0); 479 if (sock <= 0) { 480 perror("socket"); 481 return -1; 482 } 483 484 un.sun_family = AF_UNIX; 485 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 486 len = sizeof(un.sun_family) + strlen(un.sun_path); 487 488 (void)unlink(unix_fn); 489 if (bind(sock, (struct sockaddr *)&un, len) < 0) { 490 perror("bind"); 491 goto fail; 492 } 493 494 if (listen(sock, 1) < 0) { 495 perror("listen"); 496 goto fail; 497 } 498 499 return sock; 500 501 fail: 502 (void)close(sock); 503 504 return -1; 505 } 506 507 static void vub_free(struct VubDev *vdev_blk) 508 { 509 if (!vdev_blk) { 510 return; 511 } 512 513 g_main_loop_unref(vdev_blk->loop); 514 if (vdev_blk->blk_fd >= 0) { 515 close(vdev_blk->blk_fd); 516 } 517 g_free(vdev_blk); 518 } 519 520 static uint32_t 521 vub_get_blocksize(int fd) 522 { 523 uint32_t blocksize = 512; 524 525 #if defined(__linux__) && defined(BLKSSZGET) 526 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 527 return blocksize; 528 } 529 #endif 530 531 return blocksize; 532 } 533 534 static void 535 vub_initialize_config(int fd, struct virtio_blk_config *config) 536 { 537 off64_t capacity; 538 539 capacity = lseek64(fd, 0, SEEK_END); 540 config->capacity = capacity >> 9; 541 config->blk_size = vub_get_blocksize(fd); 542 config->size_max = 65536; 543 config->seg_max = 128 - 2; 544 config->min_io_size = 1; 545 config->opt_io_size = 1; 546 config->num_queues = 1; 547 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 548 config->max_discard_sectors = 32768; 549 config->max_discard_seg = 1; 550 config->discard_sector_alignment = config->blk_size >> 9; 551 config->max_write_zeroes_sectors = 32768; 552 config->max_write_zeroes_seg = 1; 553 #endif 554 } 555 556 static VubDev * 557 vub_new(char *blk_file) 558 { 559 VubDev *vdev_blk; 560 561 vdev_blk = g_new0(VubDev, 1); 562 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 563 vdev_blk->blk_fd = vub_open(blk_file, 0); 564 if (vdev_blk->blk_fd < 0) { 565 fprintf(stderr, "Error to open block device %s\n", blk_file); 566 vub_free(vdev_blk); 567 return NULL; 568 } 569 vdev_blk->enable_ro = false; 570 vdev_blk->blkcfg.wce = 0; 571 vdev_blk->blk_name = blk_file; 572 573 /* fill virtio_blk_config with block parameters */ 574 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 575 576 return vdev_blk; 577 } 578 579 static int opt_fdnum = -1; 580 static char *opt_socket_path; 581 static char *opt_blk_file; 582 static gboolean opt_print_caps; 583 static gboolean opt_read_only; 584 585 static GOptionEntry entries[] = { 586 { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, 587 "Print capabilities", NULL }, 588 { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, 589 "Use inherited fd socket", "FDNUM" }, 590 { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, 591 "Use UNIX socket path", "PATH" }, 592 {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file, 593 "block device or file path", "PATH"}, 594 { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only, 595 "Enable read-only", NULL } 596 }; 597 598 int main(int argc, char **argv) 599 { 600 int lsock = -1, csock = -1; 601 VubDev *vdev_blk = NULL; 602 GError *error = NULL; 603 GOptionContext *context; 604 605 context = g_option_context_new(NULL); 606 g_option_context_add_main_entries(context, entries, NULL); 607 if (!g_option_context_parse(context, &argc, &argv, &error)) { 608 g_printerr("Option parsing failed: %s\n", error->message); 609 exit(EXIT_FAILURE); 610 } 611 if (opt_print_caps) { 612 g_print("{\n"); 613 g_print(" \"type\": \"block\",\n"); 614 g_print(" \"features\": [\n"); 615 g_print(" \"read-only\",\n"); 616 g_print(" \"blk-file\"\n"); 617 g_print(" ]\n"); 618 g_print("}\n"); 619 exit(EXIT_SUCCESS); 620 } 621 622 if (!opt_blk_file) { 623 g_print("%s\n", g_option_context_get_help(context, true, NULL)); 624 exit(EXIT_FAILURE); 625 } 626 627 if (opt_socket_path) { 628 lsock = unix_sock_new(opt_socket_path); 629 if (lsock < 0) { 630 exit(EXIT_FAILURE); 631 } 632 } else if (opt_fdnum < 0) { 633 g_print("%s\n", g_option_context_get_help(context, true, NULL)); 634 exit(EXIT_FAILURE); 635 } else { 636 lsock = opt_fdnum; 637 } 638 639 csock = accept(lsock, NULL, NULL); 640 if (csock < 0) { 641 g_printerr("Accept error %s\n", strerror(errno)); 642 exit(EXIT_FAILURE); 643 } 644 645 vdev_blk = vub_new(opt_blk_file); 646 if (!vdev_blk) { 647 exit(EXIT_FAILURE); 648 } 649 if (opt_read_only) { 650 vdev_blk->enable_ro = true; 651 } 652 653 if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock, 654 vub_panic_cb, &vub_iface)) { 655 g_printerr("Failed to initialize libvhost-user-glib\n"); 656 exit(EXIT_FAILURE); 657 } 658 659 g_main_loop_run(vdev_blk->loop); 660 g_main_loop_unref(vdev_blk->loop); 661 g_option_context_free(context); 662 vug_deinit(&vdev_blk->parent); 663 vub_free(vdev_blk); 664 if (csock >= 0) { 665 close(csock); 666 } 667 if (lsock >= 0) { 668 close(lsock); 669 } 670 g_free(opt_socket_path); 671 g_free(opt_blk_file); 672 673 return 0; 674 } 675