1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "qemu/bswap.h" 20 #include "standard-headers/linux/virtio_blk.h" 21 #include "libvhost-user-glib.h" 22 23 #if defined(__linux__) 24 #include <linux/fs.h> 25 #include <sys/ioctl.h> 26 #endif 27 28 enum { 29 VHOST_USER_BLK_MAX_QUEUES = 8, 30 }; 31 32 struct virtio_blk_inhdr { 33 unsigned char status; 34 }; 35 36 /* vhost user block device */ 37 typedef struct VubDev { 38 VugDev parent; 39 int blk_fd; 40 struct virtio_blk_config blkcfg; 41 bool enable_ro; 42 char *blk_name; 43 GMainLoop *loop; 44 } VubDev; 45 46 typedef struct VubReq { 47 VuVirtqElement *elem; 48 int64_t sector_num; 49 size_t size; 50 struct virtio_blk_inhdr *in; 51 struct virtio_blk_outhdr *out; 52 VubDev *vdev_blk; 53 struct VuVirtq *vq; 54 } VubReq; 55 56 /* refer util/iov.c */ 57 static size_t vub_iov_size(const struct iovec *iov, 58 const unsigned int iov_cnt) 59 { 60 size_t len; 61 unsigned int i; 62 63 len = 0; 64 for (i = 0; i < iov_cnt; i++) { 65 len += iov[i].iov_len; 66 } 67 return len; 68 } 69 70 static size_t vub_iov_to_buf(const struct iovec *iov, 71 const unsigned int iov_cnt, void *buf) 72 { 73 size_t len; 74 unsigned int i; 75 76 len = 0; 77 for (i = 0; i < iov_cnt; i++) { 78 memcpy(buf + len, iov[i].iov_base, iov[i].iov_len); 79 len += iov[i].iov_len; 80 } 81 return len; 82 } 83 84 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 85 { 86 VugDev *gdev; 87 VubDev *vdev_blk; 88 89 assert(vu_dev); 90 91 gdev = container_of(vu_dev, VugDev, parent); 92 vdev_blk = container_of(gdev, VubDev, parent); 93 if (buf) { 94 g_warning("vu_panic: %s", buf); 95 } 96 97 g_main_loop_quit(vdev_blk->loop); 98 } 99 100 static void vub_req_complete(VubReq *req) 101 { 102 VugDev *gdev = &req->vdev_blk->parent; 103 VuDev *vu_dev = &gdev->parent; 104 105 /* IO size with 1 extra status byte */ 106 vu_queue_push(vu_dev, req->vq, req->elem, 107 req->size + 1); 108 vu_queue_notify(vu_dev, req->vq); 109 110 g_free(req->elem); 111 g_free(req); 112 } 113 114 static int vub_open(const char *file_name, bool wce) 115 { 116 int fd; 117 int flags = O_RDWR; 118 119 if (!wce) { 120 flags |= O_DIRECT; 121 } 122 123 fd = open(file_name, flags); 124 if (fd < 0) { 125 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 126 strerror(errno)); 127 return -1; 128 } 129 130 return fd; 131 } 132 133 static ssize_t 134 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 135 { 136 VubDev *vdev_blk = req->vdev_blk; 137 ssize_t rc; 138 139 if (!iovcnt) { 140 fprintf(stderr, "Invalid Read IOV count\n"); 141 return -1; 142 } 143 144 req->size = vub_iov_size(iov, iovcnt); 145 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 146 if (rc < 0) { 147 fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n", 148 vdev_blk->blk_name, req->sector_num, req->size, 149 strerror(errno)); 150 return -1; 151 } 152 153 return rc; 154 } 155 156 static ssize_t 157 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 158 { 159 VubDev *vdev_blk = req->vdev_blk; 160 ssize_t rc; 161 162 if (!iovcnt) { 163 fprintf(stderr, "Invalid Write IOV count\n"); 164 return -1; 165 } 166 167 req->size = vub_iov_size(iov, iovcnt); 168 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 169 if (rc < 0) { 170 fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n", 171 vdev_blk->blk_name, req->sector_num, req->size, 172 strerror(errno)); 173 return -1; 174 } 175 176 return rc; 177 } 178 179 static int 180 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, 181 uint32_t type) 182 { 183 struct virtio_blk_discard_write_zeroes *desc; 184 ssize_t size; 185 void *buf; 186 187 size = vub_iov_size(iov, iovcnt); 188 if (size != sizeof(*desc)) { 189 fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc)); 190 return -1; 191 } 192 buf = g_new0(char, size); 193 vub_iov_to_buf(iov, iovcnt, buf); 194 195 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 196 VubDev *vdev_blk = req->vdev_blk; 197 desc = buf; 198 uint64_t range[2] = { le64_to_cpu(desc->sector) << 9, 199 le32_to_cpu(desc->num_sectors) << 9 }; 200 if (type == VIRTIO_BLK_T_DISCARD) { 201 if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { 202 g_free(buf); 203 return 0; 204 } 205 } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 206 if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) { 207 g_free(buf); 208 return 0; 209 } 210 } 211 #endif 212 213 g_free(buf); 214 return -1; 215 } 216 217 static void 218 vub_flush(VubReq *req) 219 { 220 VubDev *vdev_blk = req->vdev_blk; 221 222 fdatasync(vdev_blk->blk_fd); 223 } 224 225 static int vub_virtio_process_req(VubDev *vdev_blk, 226 VuVirtq *vq) 227 { 228 VugDev *gdev = &vdev_blk->parent; 229 VuDev *vu_dev = &gdev->parent; 230 VuVirtqElement *elem; 231 uint32_t type; 232 unsigned in_num; 233 unsigned out_num; 234 VubReq *req; 235 236 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 237 if (!elem) { 238 return -1; 239 } 240 241 /* refer to hw/block/virtio_blk.c */ 242 if (elem->out_num < 1 || elem->in_num < 1) { 243 fprintf(stderr, "virtio-blk request missing headers\n"); 244 g_free(elem); 245 return -1; 246 } 247 248 req = g_new0(VubReq, 1); 249 req->vdev_blk = vdev_blk; 250 req->vq = vq; 251 req->elem = elem; 252 253 in_num = elem->in_num; 254 out_num = elem->out_num; 255 256 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 257 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 258 fprintf(stderr, "Invalid outhdr size\n"); 259 goto err; 260 } 261 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 262 out_num--; 263 264 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 265 fprintf(stderr, "Invalid inhdr size\n"); 266 goto err; 267 } 268 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 269 in_num--; 270 271 type = le32_to_cpu(req->out->type); 272 switch (type & ~VIRTIO_BLK_T_BARRIER) { 273 case VIRTIO_BLK_T_IN: 274 case VIRTIO_BLK_T_OUT: { 275 ssize_t ret = 0; 276 bool is_write = type & VIRTIO_BLK_T_OUT; 277 req->sector_num = le64_to_cpu(req->out->sector); 278 if (is_write) { 279 ret = vub_writev(req, &elem->out_sg[1], out_num); 280 } else { 281 ret = vub_readv(req, &elem->in_sg[0], in_num); 282 } 283 if (ret >= 0) { 284 req->in->status = VIRTIO_BLK_S_OK; 285 } else { 286 req->in->status = VIRTIO_BLK_S_IOERR; 287 } 288 vub_req_complete(req); 289 break; 290 } 291 case VIRTIO_BLK_T_FLUSH: 292 vub_flush(req); 293 req->in->status = VIRTIO_BLK_S_OK; 294 vub_req_complete(req); 295 break; 296 case VIRTIO_BLK_T_GET_ID: { 297 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 298 VIRTIO_BLK_ID_BYTES); 299 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 300 req->in->status = VIRTIO_BLK_S_OK; 301 req->size = elem->in_sg[0].iov_len; 302 vub_req_complete(req); 303 break; 304 } 305 case VIRTIO_BLK_T_DISCARD: 306 case VIRTIO_BLK_T_WRITE_ZEROES: { 307 int rc; 308 rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type); 309 if (rc == 0) { 310 req->in->status = VIRTIO_BLK_S_OK; 311 } else { 312 req->in->status = VIRTIO_BLK_S_IOERR; 313 } 314 vub_req_complete(req); 315 break; 316 } 317 default: 318 req->in->status = VIRTIO_BLK_S_UNSUPP; 319 vub_req_complete(req); 320 break; 321 } 322 323 return 0; 324 325 err: 326 g_free(elem); 327 g_free(req); 328 return -1; 329 } 330 331 static void vub_process_vq(VuDev *vu_dev, int idx) 332 { 333 VugDev *gdev; 334 VubDev *vdev_blk; 335 VuVirtq *vq; 336 int ret; 337 338 gdev = container_of(vu_dev, VugDev, parent); 339 vdev_blk = container_of(gdev, VubDev, parent); 340 assert(vdev_blk); 341 342 vq = vu_get_queue(vu_dev, idx); 343 assert(vq); 344 345 while (1) { 346 ret = vub_virtio_process_req(vdev_blk, vq); 347 if (ret) { 348 break; 349 } 350 } 351 } 352 353 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 354 { 355 VuVirtq *vq; 356 357 assert(vu_dev); 358 359 vq = vu_get_queue(vu_dev, idx); 360 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 361 } 362 363 static uint64_t 364 vub_get_features(VuDev *dev) 365 { 366 uint64_t features; 367 VugDev *gdev; 368 VubDev *vdev_blk; 369 370 gdev = container_of(dev, VugDev, parent); 371 vdev_blk = container_of(gdev, VubDev, parent); 372 373 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 374 1ull << VIRTIO_BLK_F_SEG_MAX | 375 1ull << VIRTIO_BLK_F_TOPOLOGY | 376 1ull << VIRTIO_BLK_F_BLK_SIZE | 377 1ull << VIRTIO_BLK_F_FLUSH | 378 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 379 1ull << VIRTIO_BLK_F_DISCARD | 380 1ull << VIRTIO_BLK_F_WRITE_ZEROES | 381 #endif 382 1ull << VIRTIO_BLK_F_CONFIG_WCE; 383 384 if (vdev_blk->enable_ro) { 385 features |= 1ull << VIRTIO_BLK_F_RO; 386 } 387 388 return features; 389 } 390 391 static uint64_t 392 vub_get_protocol_features(VuDev *dev) 393 { 394 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG | 395 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD; 396 } 397 398 static int 399 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 400 { 401 VugDev *gdev; 402 VubDev *vdev_blk; 403 404 if (len > sizeof(struct virtio_blk_config)) { 405 return -1; 406 } 407 408 gdev = container_of(vu_dev, VugDev, parent); 409 vdev_blk = container_of(gdev, VubDev, parent); 410 memcpy(config, &vdev_blk->blkcfg, len); 411 412 return 0; 413 } 414 415 static int 416 vub_set_config(VuDev *vu_dev, const uint8_t *data, 417 uint32_t offset, uint32_t size, uint32_t flags) 418 { 419 VugDev *gdev; 420 VubDev *vdev_blk; 421 uint8_t wce; 422 int fd; 423 424 /* don't support live migration */ 425 if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) { 426 return -1; 427 } 428 429 gdev = container_of(vu_dev, VugDev, parent); 430 vdev_blk = container_of(gdev, VubDev, parent); 431 432 if (offset != offsetof(struct virtio_blk_config, wce) || 433 size != 1) { 434 return -1; 435 } 436 437 wce = *data; 438 if (wce == vdev_blk->blkcfg.wce) { 439 /* Do nothing as same with old configuration */ 440 return 0; 441 } 442 443 vdev_blk->blkcfg.wce = wce; 444 fprintf(stdout, "Write Cache Policy Changed\n"); 445 if (vdev_blk->blk_fd >= 0) { 446 close(vdev_blk->blk_fd); 447 vdev_blk->blk_fd = -1; 448 } 449 450 fd = vub_open(vdev_blk->blk_name, wce); 451 if (fd < 0) { 452 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 453 vdev_blk->blk_fd = -1; 454 return -1; 455 } 456 vdev_blk->blk_fd = fd; 457 458 return 0; 459 } 460 461 static const VuDevIface vub_iface = { 462 .get_features = vub_get_features, 463 .queue_set_started = vub_queue_set_started, 464 .get_protocol_features = vub_get_protocol_features, 465 .get_config = vub_get_config, 466 .set_config = vub_set_config, 467 }; 468 469 static int unix_sock_new(char *unix_fn) 470 { 471 int sock; 472 struct sockaddr_un un; 473 474 assert(unix_fn); 475 476 sock = socket(AF_UNIX, SOCK_STREAM, 0); 477 if (sock < 0) { 478 perror("socket"); 479 return -1; 480 } 481 482 un.sun_family = AF_UNIX; 483 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 484 485 (void)unlink(unix_fn); 486 if (bind(sock, (struct sockaddr *)&un, sizeof(un)) < 0) { 487 perror("bind"); 488 goto fail; 489 } 490 491 if (listen(sock, 1) < 0) { 492 perror("listen"); 493 goto fail; 494 } 495 496 return sock; 497 498 fail: 499 (void)close(sock); 500 501 return -1; 502 } 503 504 static void vub_free(struct VubDev *vdev_blk) 505 { 506 if (!vdev_blk) { 507 return; 508 } 509 510 g_main_loop_unref(vdev_blk->loop); 511 if (vdev_blk->blk_fd >= 0) { 512 close(vdev_blk->blk_fd); 513 } 514 g_free(vdev_blk); 515 } 516 517 static uint32_t 518 vub_get_blocksize(int fd) 519 { 520 uint32_t blocksize = 512; 521 522 #if defined(__linux__) && defined(BLKSSZGET) 523 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 524 return blocksize; 525 } 526 #endif 527 528 return blocksize; 529 } 530 531 static void 532 vub_initialize_config(int fd, struct virtio_blk_config *config) 533 { 534 off_t capacity; 535 536 capacity = lseek(fd, 0, SEEK_END); 537 config->capacity = capacity >> 9; 538 config->blk_size = vub_get_blocksize(fd); 539 config->size_max = 65536; 540 config->seg_max = 128 - 2; 541 config->min_io_size = 1; 542 config->opt_io_size = 1; 543 config->num_queues = 1; 544 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 545 config->max_discard_sectors = 32768; 546 config->max_discard_seg = 1; 547 config->discard_sector_alignment = config->blk_size >> 9; 548 config->max_write_zeroes_sectors = 32768; 549 config->max_write_zeroes_seg = 1; 550 #endif 551 } 552 553 static VubDev * 554 vub_new(char *blk_file) 555 { 556 VubDev *vdev_blk; 557 558 vdev_blk = g_new0(VubDev, 1); 559 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 560 vdev_blk->blk_fd = vub_open(blk_file, 0); 561 if (vdev_blk->blk_fd < 0) { 562 fprintf(stderr, "Error to open block device %s\n", blk_file); 563 vub_free(vdev_blk); 564 return NULL; 565 } 566 vdev_blk->enable_ro = false; 567 vdev_blk->blkcfg.wce = 0; 568 vdev_blk->blk_name = blk_file; 569 570 /* fill virtio_blk_config with block parameters */ 571 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 572 573 return vdev_blk; 574 } 575 576 static int opt_fdnum = -1; 577 static char *opt_socket_path; 578 static char *opt_blk_file; 579 static gboolean opt_print_caps; 580 static gboolean opt_read_only; 581 582 static GOptionEntry entries[] = { 583 { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, 584 "Print capabilities", NULL }, 585 { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, 586 "Use inherited fd socket", "FDNUM" }, 587 { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, 588 "Use UNIX socket path", "PATH" }, 589 {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file, 590 "block device or file path", "PATH"}, 591 { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only, 592 "Enable read-only", NULL }, 593 { NULL, }, 594 }; 595 596 int main(int argc, char **argv) 597 { 598 int lsock = -1, csock = -1; 599 VubDev *vdev_blk = NULL; 600 GError *error = NULL; 601 GOptionContext *context; 602 603 context = g_option_context_new(NULL); 604 g_option_context_add_main_entries(context, entries, NULL); 605 if (!g_option_context_parse(context, &argc, &argv, &error)) { 606 g_printerr("Option parsing failed: %s\n", error->message); 607 exit(EXIT_FAILURE); 608 } 609 if (opt_print_caps) { 610 g_print("{\n"); 611 g_print(" \"type\": \"block\",\n"); 612 g_print(" \"features\": [\n"); 613 g_print(" \"read-only\",\n"); 614 g_print(" \"blk-file\"\n"); 615 g_print(" ]\n"); 616 g_print("}\n"); 617 exit(EXIT_SUCCESS); 618 } 619 620 if (!opt_blk_file) { 621 g_print("%s\n", g_option_context_get_help(context, true, NULL)); 622 exit(EXIT_FAILURE); 623 } 624 625 if (opt_socket_path) { 626 lsock = unix_sock_new(opt_socket_path); 627 if (lsock < 0) { 628 exit(EXIT_FAILURE); 629 } 630 } else if (opt_fdnum < 0) { 631 g_print("%s\n", g_option_context_get_help(context, true, NULL)); 632 exit(EXIT_FAILURE); 633 } else { 634 lsock = opt_fdnum; 635 } 636 637 csock = accept(lsock, NULL, NULL); 638 if (csock < 0) { 639 g_printerr("Accept error %s\n", strerror(errno)); 640 exit(EXIT_FAILURE); 641 } 642 643 vdev_blk = vub_new(opt_blk_file); 644 if (!vdev_blk) { 645 exit(EXIT_FAILURE); 646 } 647 if (opt_read_only) { 648 vdev_blk->enable_ro = true; 649 } 650 651 if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock, 652 vub_panic_cb, &vub_iface)) { 653 g_printerr("Failed to initialize libvhost-user-glib\n"); 654 exit(EXIT_FAILURE); 655 } 656 657 g_main_loop_run(vdev_blk->loop); 658 g_main_loop_unref(vdev_blk->loop); 659 g_option_context_free(context); 660 vug_deinit(&vdev_blk->parent); 661 vub_free(vdev_blk); 662 if (csock >= 0) { 663 close(csock); 664 } 665 if (lsock >= 0) { 666 close(lsock); 667 } 668 g_free(opt_socket_path); 669 g_free(opt_blk_file); 670 671 return 0; 672 } 673