1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "standard-headers/linux/virtio_blk.h" 20 #include "contrib/libvhost-user/libvhost-user-glib.h" 21 #include "contrib/libvhost-user/libvhost-user.h" 22 23 #if defined(__linux__) 24 #include <linux/fs.h> 25 #include <sys/ioctl.h> 26 #endif 27 28 struct virtio_blk_inhdr { 29 unsigned char status; 30 }; 31 32 /* vhost user block device */ 33 typedef struct VubDev { 34 VugDev parent; 35 int blk_fd; 36 struct virtio_blk_config blkcfg; 37 bool enable_ro; 38 char *blk_name; 39 GMainLoop *loop; 40 } VubDev; 41 42 typedef struct VubReq { 43 VuVirtqElement *elem; 44 int64_t sector_num; 45 size_t size; 46 struct virtio_blk_inhdr *in; 47 struct virtio_blk_outhdr *out; 48 VubDev *vdev_blk; 49 struct VuVirtq *vq; 50 } VubReq; 51 52 /* refer util/iov.c */ 53 static size_t vub_iov_size(const struct iovec *iov, 54 const unsigned int iov_cnt) 55 { 56 size_t len; 57 unsigned int i; 58 59 len = 0; 60 for (i = 0; i < iov_cnt; i++) { 61 len += iov[i].iov_len; 62 } 63 return len; 64 } 65 66 static size_t vub_iov_to_buf(const struct iovec *iov, 67 const unsigned int iov_cnt, void *buf) 68 { 69 size_t len; 70 unsigned int i; 71 72 len = 0; 73 for (i = 0; i < iov_cnt; i++) { 74 memcpy(buf + len, iov[i].iov_base, iov[i].iov_len); 75 len += iov[i].iov_len; 76 } 77 return len; 78 } 79 80 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 81 { 82 VugDev *gdev; 83 VubDev *vdev_blk; 84 85 assert(vu_dev); 86 87 gdev = container_of(vu_dev, VugDev, parent); 88 vdev_blk = container_of(gdev, VubDev, parent); 89 if (buf) { 90 g_warning("vu_panic: %s", buf); 91 } 92 93 g_main_loop_quit(vdev_blk->loop); 94 } 95 96 static void vub_req_complete(VubReq *req) 97 { 98 VugDev *gdev = &req->vdev_blk->parent; 99 VuDev *vu_dev = &gdev->parent; 100 101 /* IO size with 1 extra status byte */ 102 vu_queue_push(vu_dev, req->vq, req->elem, 103 req->size + 1); 104 vu_queue_notify(vu_dev, req->vq); 105 106 if (req->elem) { 107 free(req->elem); 108 } 109 110 g_free(req); 111 } 112 113 static int vub_open(const char *file_name, bool wce) 114 { 115 int fd; 116 int flags = O_RDWR; 117 118 if (!wce) { 119 flags |= O_DIRECT; 120 } 121 122 fd = open(file_name, flags); 123 if (fd < 0) { 124 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 125 strerror(errno)); 126 return -1; 127 } 128 129 return fd; 130 } 131 132 static ssize_t 133 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 134 { 135 VubDev *vdev_blk = req->vdev_blk; 136 ssize_t rc; 137 138 if (!iovcnt) { 139 fprintf(stderr, "Invalid Read IOV count\n"); 140 return -1; 141 } 142 143 req->size = vub_iov_size(iov, iovcnt); 144 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 145 if (rc < 0) { 146 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 147 vdev_blk->blk_name, req->sector_num, req->size, 148 strerror(errno)); 149 return -1; 150 } 151 152 return rc; 153 } 154 155 static ssize_t 156 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 157 { 158 VubDev *vdev_blk = req->vdev_blk; 159 ssize_t rc; 160 161 if (!iovcnt) { 162 fprintf(stderr, "Invalid Write IOV count\n"); 163 return -1; 164 } 165 166 req->size = vub_iov_size(iov, iovcnt); 167 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 168 if (rc < 0) { 169 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 170 vdev_blk->blk_name, req->sector_num, req->size, 171 strerror(errno)); 172 return -1; 173 } 174 175 return rc; 176 } 177 178 static int 179 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, 180 uint32_t type) 181 { 182 struct virtio_blk_discard_write_zeroes *desc; 183 ssize_t size; 184 void *buf; 185 186 size = vub_iov_size(iov, iovcnt); 187 if (size != sizeof(*desc)) { 188 fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc)); 189 return -1; 190 } 191 buf = g_new0(char, size); 192 vub_iov_to_buf(iov, iovcnt, buf); 193 194 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 195 VubDev *vdev_blk = req->vdev_blk; 196 desc = (struct virtio_blk_discard_write_zeroes *)buf; 197 uint64_t range[2] = { le64toh(desc->sector) << 9, 198 le32toh(desc->num_sectors) << 9 }; 199 if (type == VIRTIO_BLK_T_DISCARD) { 200 if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { 201 g_free(buf); 202 return 0; 203 } 204 } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 205 if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) { 206 g_free(buf); 207 return 0; 208 } 209 } 210 #endif 211 212 g_free(buf); 213 return -1; 214 } 215 216 static void 217 vub_flush(VubReq *req) 218 { 219 VubDev *vdev_blk = req->vdev_blk; 220 221 fdatasync(vdev_blk->blk_fd); 222 } 223 224 static int vub_virtio_process_req(VubDev *vdev_blk, 225 VuVirtq *vq) 226 { 227 VugDev *gdev = &vdev_blk->parent; 228 VuDev *vu_dev = &gdev->parent; 229 VuVirtqElement *elem; 230 uint32_t type; 231 unsigned in_num; 232 unsigned out_num; 233 VubReq *req; 234 235 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 236 if (!elem) { 237 return -1; 238 } 239 240 /* refer to hw/block/virtio_blk.c */ 241 if (elem->out_num < 1 || elem->in_num < 1) { 242 fprintf(stderr, "virtio-blk request missing headers\n"); 243 free(elem); 244 return -1; 245 } 246 247 req = g_new0(VubReq, 1); 248 req->vdev_blk = vdev_blk; 249 req->vq = vq; 250 req->elem = elem; 251 252 in_num = elem->in_num; 253 out_num = elem->out_num; 254 255 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 256 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 257 fprintf(stderr, "Invalid outhdr size\n"); 258 goto err; 259 } 260 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 261 out_num--; 262 263 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 264 fprintf(stderr, "Invalid inhdr size\n"); 265 goto err; 266 } 267 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 268 in_num--; 269 270 type = le32toh(req->out->type); 271 switch (type & ~VIRTIO_BLK_T_BARRIER) { 272 case VIRTIO_BLK_T_IN: 273 case VIRTIO_BLK_T_OUT: { 274 ssize_t ret = 0; 275 bool is_write = type & VIRTIO_BLK_T_OUT; 276 req->sector_num = le64toh(req->out->sector); 277 if (is_write) { 278 ret = vub_writev(req, &elem->out_sg[1], out_num); 279 } else { 280 ret = vub_readv(req, &elem->in_sg[0], in_num); 281 } 282 if (ret >= 0) { 283 req->in->status = VIRTIO_BLK_S_OK; 284 } else { 285 req->in->status = VIRTIO_BLK_S_IOERR; 286 } 287 vub_req_complete(req); 288 break; 289 } 290 case VIRTIO_BLK_T_FLUSH: 291 vub_flush(req); 292 req->in->status = VIRTIO_BLK_S_OK; 293 vub_req_complete(req); 294 break; 295 case VIRTIO_BLK_T_GET_ID: { 296 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 297 VIRTIO_BLK_ID_BYTES); 298 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 299 req->in->status = VIRTIO_BLK_S_OK; 300 req->size = elem->in_sg[0].iov_len; 301 vub_req_complete(req); 302 break; 303 } 304 case VIRTIO_BLK_T_DISCARD: 305 case VIRTIO_BLK_T_WRITE_ZEROES: { 306 int rc; 307 rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type); 308 if (rc == 0) { 309 req->in->status = VIRTIO_BLK_S_OK; 310 } else { 311 req->in->status = VIRTIO_BLK_S_IOERR; 312 } 313 vub_req_complete(req); 314 break; 315 } 316 default: 317 req->in->status = VIRTIO_BLK_S_UNSUPP; 318 vub_req_complete(req); 319 break; 320 } 321 322 return 0; 323 324 err: 325 free(elem); 326 g_free(req); 327 return -1; 328 } 329 330 static void vub_process_vq(VuDev *vu_dev, int idx) 331 { 332 VugDev *gdev; 333 VubDev *vdev_blk; 334 VuVirtq *vq; 335 int ret; 336 337 if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { 338 fprintf(stderr, "VQ Index out of range: %d\n", idx); 339 vub_panic_cb(vu_dev, NULL); 340 return; 341 } 342 343 gdev = container_of(vu_dev, VugDev, parent); 344 vdev_blk = container_of(gdev, VubDev, parent); 345 assert(vdev_blk); 346 347 vq = vu_get_queue(vu_dev, idx); 348 assert(vq); 349 350 while (1) { 351 ret = vub_virtio_process_req(vdev_blk, vq); 352 if (ret) { 353 break; 354 } 355 } 356 } 357 358 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 359 { 360 VuVirtq *vq; 361 362 assert(vu_dev); 363 364 vq = vu_get_queue(vu_dev, idx); 365 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 366 } 367 368 static uint64_t 369 vub_get_features(VuDev *dev) 370 { 371 uint64_t features; 372 VugDev *gdev; 373 VubDev *vdev_blk; 374 375 gdev = container_of(dev, VugDev, parent); 376 vdev_blk = container_of(gdev, VubDev, parent); 377 378 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 379 1ull << VIRTIO_BLK_F_SEG_MAX | 380 1ull << VIRTIO_BLK_F_TOPOLOGY | 381 1ull << VIRTIO_BLK_F_BLK_SIZE | 382 1ull << VIRTIO_BLK_F_FLUSH | 383 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 384 1ull << VIRTIO_BLK_F_DISCARD | 385 1ull << VIRTIO_BLK_F_WRITE_ZEROES | 386 #endif 387 1ull << VIRTIO_BLK_F_CONFIG_WCE | 388 1ull << VIRTIO_F_VERSION_1 | 389 1ull << VHOST_USER_F_PROTOCOL_FEATURES; 390 391 if (vdev_blk->enable_ro) { 392 features |= 1ull << VIRTIO_BLK_F_RO; 393 } 394 395 return features; 396 } 397 398 static uint64_t 399 vub_get_protocol_features(VuDev *dev) 400 { 401 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG; 402 } 403 404 static int 405 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 406 { 407 VugDev *gdev; 408 VubDev *vdev_blk; 409 410 gdev = container_of(vu_dev, VugDev, parent); 411 vdev_blk = container_of(gdev, VubDev, parent); 412 memcpy(config, &vdev_blk->blkcfg, len); 413 414 return 0; 415 } 416 417 static int 418 vub_set_config(VuDev *vu_dev, const uint8_t *data, 419 uint32_t offset, uint32_t size, uint32_t flags) 420 { 421 VugDev *gdev; 422 VubDev *vdev_blk; 423 uint8_t wce; 424 int fd; 425 426 /* don't support live migration */ 427 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { 428 return -1; 429 } 430 431 gdev = container_of(vu_dev, VugDev, parent); 432 vdev_blk = container_of(gdev, VubDev, parent); 433 434 if (offset != offsetof(struct virtio_blk_config, wce) || 435 size != 1) { 436 return -1; 437 } 438 439 wce = *data; 440 if (wce == vdev_blk->blkcfg.wce) { 441 /* Do nothing as same with old configuration */ 442 return 0; 443 } 444 445 vdev_blk->blkcfg.wce = wce; 446 fprintf(stdout, "Write Cache Policy Changed\n"); 447 if (vdev_blk->blk_fd >= 0) { 448 close(vdev_blk->blk_fd); 449 vdev_blk->blk_fd = -1; 450 } 451 452 fd = vub_open(vdev_blk->blk_name, wce); 453 if (fd < 0) { 454 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 455 vdev_blk->blk_fd = -1; 456 return -1; 457 } 458 vdev_blk->blk_fd = fd; 459 460 return 0; 461 } 462 463 static const VuDevIface vub_iface = { 464 .get_features = vub_get_features, 465 .queue_set_started = vub_queue_set_started, 466 .get_protocol_features = vub_get_protocol_features, 467 .get_config = vub_get_config, 468 .set_config = vub_set_config, 469 }; 470 471 static int unix_sock_new(char *unix_fn) 472 { 473 int sock; 474 struct sockaddr_un un; 475 size_t len; 476 477 assert(unix_fn); 478 479 sock = socket(AF_UNIX, SOCK_STREAM, 0); 480 if (sock <= 0) { 481 perror("socket"); 482 return -1; 483 } 484 485 un.sun_family = AF_UNIX; 486 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 487 len = sizeof(un.sun_family) + strlen(un.sun_path); 488 489 (void)unlink(unix_fn); 490 if (bind(sock, (struct sockaddr *)&un, len) < 0) { 491 perror("bind"); 492 goto fail; 493 } 494 495 if (listen(sock, 1) < 0) { 496 perror("listen"); 497 goto fail; 498 } 499 500 return sock; 501 502 fail: 503 (void)close(sock); 504 505 return -1; 506 } 507 508 static void vub_free(struct VubDev *vdev_blk) 509 { 510 if (!vdev_blk) { 511 return; 512 } 513 514 g_main_loop_unref(vdev_blk->loop); 515 if (vdev_blk->blk_fd >= 0) { 516 close(vdev_blk->blk_fd); 517 } 518 g_free(vdev_blk); 519 } 520 521 static uint32_t 522 vub_get_blocksize(int fd) 523 { 524 uint32_t blocksize = 512; 525 526 #if defined(__linux__) && defined(BLKSSZGET) 527 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 528 return blocksize; 529 } 530 #endif 531 532 return blocksize; 533 } 534 535 static void 536 vub_initialize_config(int fd, struct virtio_blk_config *config) 537 { 538 off64_t capacity; 539 540 capacity = lseek64(fd, 0, SEEK_END); 541 config->capacity = capacity >> 9; 542 config->blk_size = vub_get_blocksize(fd); 543 config->size_max = 65536; 544 config->seg_max = 128 - 2; 545 config->min_io_size = 1; 546 config->opt_io_size = 1; 547 config->num_queues = 1; 548 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 549 config->max_discard_sectors = 32768; 550 config->max_discard_seg = 1; 551 config->discard_sector_alignment = config->blk_size >> 9; 552 config->max_write_zeroes_sectors = 32768; 553 config->max_write_zeroes_seg = 1; 554 #endif 555 } 556 557 static VubDev * 558 vub_new(char *blk_file) 559 { 560 VubDev *vdev_blk; 561 562 vdev_blk = g_new0(VubDev, 1); 563 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 564 vdev_blk->blk_fd = vub_open(blk_file, 0); 565 if (vdev_blk->blk_fd < 0) { 566 fprintf(stderr, "Error to open block device %s\n", blk_file); 567 vub_free(vdev_blk); 568 return NULL; 569 } 570 vdev_blk->enable_ro = false; 571 vdev_blk->blkcfg.wce = 0; 572 vdev_blk->blk_name = blk_file; 573 574 /* fill virtio_blk_config with block parameters */ 575 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 576 577 return vdev_blk; 578 } 579 580 int main(int argc, char **argv) 581 { 582 int opt; 583 char *unix_socket = NULL; 584 char *blk_file = NULL; 585 bool enable_ro = false; 586 int lsock = -1, csock = -1; 587 VubDev *vdev_blk = NULL; 588 589 while ((opt = getopt(argc, argv, "b:rs:h")) != -1) { 590 switch (opt) { 591 case 'b': 592 blk_file = g_strdup(optarg); 593 break; 594 case 's': 595 unix_socket = g_strdup(optarg); 596 break; 597 case 'r': 598 enable_ro = true; 599 break; 600 case 'h': 601 default: 602 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 603 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 604 return 0; 605 } 606 } 607 608 if (!unix_socket || !blk_file) { 609 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 610 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 611 return -1; 612 } 613 614 lsock = unix_sock_new(unix_socket); 615 if (lsock < 0) { 616 goto err; 617 } 618 619 csock = accept(lsock, (void *)0, (void *)0); 620 if (csock < 0) { 621 fprintf(stderr, "Accept error %s\n", strerror(errno)); 622 goto err; 623 } 624 625 vdev_blk = vub_new(blk_file); 626 if (!vdev_blk) { 627 goto err; 628 } 629 if (enable_ro) { 630 vdev_blk->enable_ro = true; 631 } 632 633 vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); 634 635 g_main_loop_run(vdev_blk->loop); 636 637 vug_deinit(&vdev_blk->parent); 638 639 err: 640 vub_free(vdev_blk); 641 if (csock >= 0) { 642 close(csock); 643 } 644 if (lsock >= 0) { 645 close(lsock); 646 } 647 g_free(unix_socket); 648 g_free(blk_file); 649 650 return 0; 651 } 652