1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "standard-headers/linux/virtio_blk.h" 20 #include "contrib/libvhost-user/libvhost-user-glib.h" 21 #include "contrib/libvhost-user/libvhost-user.h" 22 23 #if defined(__linux__) 24 #include <linux/fs.h> 25 #include <sys/ioctl.h> 26 #endif 27 28 struct virtio_blk_inhdr { 29 unsigned char status; 30 }; 31 32 /* vhost user block device */ 33 typedef struct VubDev { 34 VugDev parent; 35 int blk_fd; 36 struct virtio_blk_config blkcfg; 37 bool enable_ro; 38 char *blk_name; 39 GMainLoop *loop; 40 } VubDev; 41 42 typedef struct VubReq { 43 VuVirtqElement *elem; 44 int64_t sector_num; 45 size_t size; 46 struct virtio_blk_inhdr *in; 47 struct virtio_blk_outhdr *out; 48 VubDev *vdev_blk; 49 struct VuVirtq *vq; 50 } VubReq; 51 52 /* refer util/iov.c */ 53 static size_t vub_iov_size(const struct iovec *iov, 54 const unsigned int iov_cnt) 55 { 56 size_t len; 57 unsigned int i; 58 59 len = 0; 60 for (i = 0; i < iov_cnt; i++) { 61 len += iov[i].iov_len; 62 } 63 return len; 64 } 65 66 static size_t vub_iov_to_buf(const struct iovec *iov, 67 const unsigned int iov_cnt, void *buf) 68 { 69 size_t len; 70 unsigned int i; 71 72 len = 0; 73 for (i = 0; i < iov_cnt; i++) { 74 memcpy(buf + len, iov[i].iov_base, iov[i].iov_len); 75 len += iov[i].iov_len; 76 } 77 return len; 78 } 79 80 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 81 { 82 VugDev *gdev; 83 VubDev *vdev_blk; 84 85 assert(vu_dev); 86 87 gdev = container_of(vu_dev, VugDev, parent); 88 vdev_blk = container_of(gdev, VubDev, parent); 89 if (buf) { 90 g_warning("vu_panic: %s", buf); 91 } 92 93 g_main_loop_quit(vdev_blk->loop); 94 } 95 96 static void vub_req_complete(VubReq *req) 97 { 98 VugDev *gdev = &req->vdev_blk->parent; 99 VuDev *vu_dev = &gdev->parent; 100 101 /* IO size with 1 extra status byte */ 102 vu_queue_push(vu_dev, req->vq, req->elem, 103 req->size + 1); 104 vu_queue_notify(vu_dev, req->vq); 105 106 if (req->elem) { 107 free(req->elem); 108 } 109 110 g_free(req); 111 } 112 113 static int vub_open(const char *file_name, bool wce) 114 { 115 int fd; 116 int flags = O_RDWR; 117 118 if (!wce) { 119 flags |= O_DIRECT; 120 } 121 122 fd = open(file_name, flags); 123 if (fd < 0) { 124 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 125 strerror(errno)); 126 return -1; 127 } 128 129 return fd; 130 } 131 132 static ssize_t 133 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 134 { 135 VubDev *vdev_blk = req->vdev_blk; 136 ssize_t rc; 137 138 if (!iovcnt) { 139 fprintf(stderr, "Invalid Read IOV count\n"); 140 return -1; 141 } 142 143 req->size = vub_iov_size(iov, iovcnt); 144 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 145 if (rc < 0) { 146 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 147 vdev_blk->blk_name, req->sector_num, req->size, 148 strerror(errno)); 149 return -1; 150 } 151 152 return rc; 153 } 154 155 static ssize_t 156 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 157 { 158 VubDev *vdev_blk = req->vdev_blk; 159 ssize_t rc; 160 161 if (!iovcnt) { 162 fprintf(stderr, "Invalid Write IOV count\n"); 163 return -1; 164 } 165 166 req->size = vub_iov_size(iov, iovcnt); 167 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 168 if (rc < 0) { 169 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 170 vdev_blk->blk_name, req->sector_num, req->size, 171 strerror(errno)); 172 return -1; 173 } 174 175 return rc; 176 } 177 178 static int 179 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, 180 uint32_t type) 181 { 182 struct virtio_blk_discard_write_zeroes *desc; 183 ssize_t size; 184 void *buf; 185 186 size = vub_iov_size(iov, iovcnt); 187 if (size != sizeof(*desc)) { 188 fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc)); 189 return -1; 190 } 191 buf = g_new0(char, size); 192 vub_iov_to_buf(iov, iovcnt, buf); 193 194 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 195 VubDev *vdev_blk = req->vdev_blk; 196 desc = (struct virtio_blk_discard_write_zeroes *)buf; 197 uint64_t range[2] = { le64toh(desc->sector) << 9, 198 le32toh(desc->num_sectors) << 9 }; 199 if (type == VIRTIO_BLK_T_DISCARD) { 200 if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { 201 g_free(buf); 202 return 0; 203 } 204 } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 205 if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) { 206 g_free(buf); 207 return 0; 208 } 209 } 210 #endif 211 212 g_free(buf); 213 return -1; 214 } 215 216 static void 217 vub_flush(VubReq *req) 218 { 219 VubDev *vdev_blk = req->vdev_blk; 220 221 fdatasync(vdev_blk->blk_fd); 222 } 223 224 static int vub_virtio_process_req(VubDev *vdev_blk, 225 VuVirtq *vq) 226 { 227 VugDev *gdev = &vdev_blk->parent; 228 VuDev *vu_dev = &gdev->parent; 229 VuVirtqElement *elem; 230 uint32_t type; 231 unsigned in_num; 232 unsigned out_num; 233 VubReq *req; 234 235 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 236 if (!elem) { 237 return -1; 238 } 239 240 /* refer to hw/block/virtio_blk.c */ 241 if (elem->out_num < 1 || elem->in_num < 1) { 242 fprintf(stderr, "virtio-blk request missing headers\n"); 243 free(elem); 244 return -1; 245 } 246 247 req = g_new0(VubReq, 1); 248 req->vdev_blk = vdev_blk; 249 req->vq = vq; 250 req->elem = elem; 251 252 in_num = elem->in_num; 253 out_num = elem->out_num; 254 255 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 256 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 257 fprintf(stderr, "Invalid outhdr size\n"); 258 goto err; 259 } 260 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 261 out_num--; 262 263 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 264 fprintf(stderr, "Invalid inhdr size\n"); 265 goto err; 266 } 267 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 268 in_num--; 269 270 type = le32toh(req->out->type); 271 switch (type & ~VIRTIO_BLK_T_BARRIER) { 272 case VIRTIO_BLK_T_IN: 273 case VIRTIO_BLK_T_OUT: { 274 ssize_t ret = 0; 275 bool is_write = type & VIRTIO_BLK_T_OUT; 276 req->sector_num = le64toh(req->out->sector); 277 if (is_write) { 278 ret = vub_writev(req, &elem->out_sg[1], out_num); 279 } else { 280 ret = vub_readv(req, &elem->in_sg[0], in_num); 281 } 282 if (ret >= 0) { 283 req->in->status = VIRTIO_BLK_S_OK; 284 } else { 285 req->in->status = VIRTIO_BLK_S_IOERR; 286 } 287 vub_req_complete(req); 288 break; 289 } 290 case VIRTIO_BLK_T_FLUSH: 291 vub_flush(req); 292 req->in->status = VIRTIO_BLK_S_OK; 293 vub_req_complete(req); 294 break; 295 case VIRTIO_BLK_T_GET_ID: { 296 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 297 VIRTIO_BLK_ID_BYTES); 298 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 299 req->in->status = VIRTIO_BLK_S_OK; 300 req->size = elem->in_sg[0].iov_len; 301 vub_req_complete(req); 302 break; 303 } 304 case VIRTIO_BLK_T_DISCARD: 305 case VIRTIO_BLK_T_WRITE_ZEROES: { 306 int rc; 307 rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type); 308 if (rc == 0) { 309 req->in->status = VIRTIO_BLK_S_OK; 310 } else { 311 req->in->status = VIRTIO_BLK_S_IOERR; 312 } 313 vub_req_complete(req); 314 break; 315 } 316 default: 317 req->in->status = VIRTIO_BLK_S_UNSUPP; 318 vub_req_complete(req); 319 break; 320 } 321 322 return 0; 323 324 err: 325 free(elem); 326 g_free(req); 327 return -1; 328 } 329 330 static void vub_process_vq(VuDev *vu_dev, int idx) 331 { 332 VugDev *gdev; 333 VubDev *vdev_blk; 334 VuVirtq *vq; 335 int ret; 336 337 if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { 338 fprintf(stderr, "VQ Index out of range: %d\n", idx); 339 vub_panic_cb(vu_dev, NULL); 340 return; 341 } 342 343 gdev = container_of(vu_dev, VugDev, parent); 344 vdev_blk = container_of(gdev, VubDev, parent); 345 assert(vdev_blk); 346 347 vq = vu_get_queue(vu_dev, idx); 348 assert(vq); 349 350 while (1) { 351 ret = vub_virtio_process_req(vdev_blk, vq); 352 if (ret) { 353 break; 354 } 355 } 356 } 357 358 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 359 { 360 VuVirtq *vq; 361 362 assert(vu_dev); 363 364 vq = vu_get_queue(vu_dev, idx); 365 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 366 } 367 368 static uint64_t 369 vub_get_features(VuDev *dev) 370 { 371 uint64_t features; 372 VugDev *gdev; 373 VubDev *vdev_blk; 374 375 gdev = container_of(dev, VugDev, parent); 376 vdev_blk = container_of(gdev, VubDev, parent); 377 378 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 379 1ull << VIRTIO_BLK_F_SEG_MAX | 380 1ull << VIRTIO_BLK_F_TOPOLOGY | 381 1ull << VIRTIO_BLK_F_BLK_SIZE | 382 1ull << VIRTIO_BLK_F_FLUSH | 383 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 384 1ull << VIRTIO_BLK_F_DISCARD | 385 1ull << VIRTIO_BLK_F_WRITE_ZEROES | 386 #endif 387 1ull << VIRTIO_BLK_F_CONFIG_WCE | 388 1ull << VIRTIO_F_VERSION_1 | 389 1ull << VHOST_USER_F_PROTOCOL_FEATURES; 390 391 if (vdev_blk->enable_ro) { 392 features |= 1ull << VIRTIO_BLK_F_RO; 393 } 394 395 return features; 396 } 397 398 static uint64_t 399 vub_get_protocol_features(VuDev *dev) 400 { 401 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG | 402 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD; 403 } 404 405 static int 406 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 407 { 408 VugDev *gdev; 409 VubDev *vdev_blk; 410 411 gdev = container_of(vu_dev, VugDev, parent); 412 vdev_blk = container_of(gdev, VubDev, parent); 413 memcpy(config, &vdev_blk->blkcfg, len); 414 415 return 0; 416 } 417 418 static int 419 vub_set_config(VuDev *vu_dev, const uint8_t *data, 420 uint32_t offset, uint32_t size, uint32_t flags) 421 { 422 VugDev *gdev; 423 VubDev *vdev_blk; 424 uint8_t wce; 425 int fd; 426 427 /* don't support live migration */ 428 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { 429 return -1; 430 } 431 432 gdev = container_of(vu_dev, VugDev, parent); 433 vdev_blk = container_of(gdev, VubDev, parent); 434 435 if (offset != offsetof(struct virtio_blk_config, wce) || 436 size != 1) { 437 return -1; 438 } 439 440 wce = *data; 441 if (wce == vdev_blk->blkcfg.wce) { 442 /* Do nothing as same with old configuration */ 443 return 0; 444 } 445 446 vdev_blk->blkcfg.wce = wce; 447 fprintf(stdout, "Write Cache Policy Changed\n"); 448 if (vdev_blk->blk_fd >= 0) { 449 close(vdev_blk->blk_fd); 450 vdev_blk->blk_fd = -1; 451 } 452 453 fd = vub_open(vdev_blk->blk_name, wce); 454 if (fd < 0) { 455 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 456 vdev_blk->blk_fd = -1; 457 return -1; 458 } 459 vdev_blk->blk_fd = fd; 460 461 return 0; 462 } 463 464 static const VuDevIface vub_iface = { 465 .get_features = vub_get_features, 466 .queue_set_started = vub_queue_set_started, 467 .get_protocol_features = vub_get_protocol_features, 468 .get_config = vub_get_config, 469 .set_config = vub_set_config, 470 }; 471 472 static int unix_sock_new(char *unix_fn) 473 { 474 int sock; 475 struct sockaddr_un un; 476 size_t len; 477 478 assert(unix_fn); 479 480 sock = socket(AF_UNIX, SOCK_STREAM, 0); 481 if (sock <= 0) { 482 perror("socket"); 483 return -1; 484 } 485 486 un.sun_family = AF_UNIX; 487 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 488 len = sizeof(un.sun_family) + strlen(un.sun_path); 489 490 (void)unlink(unix_fn); 491 if (bind(sock, (struct sockaddr *)&un, len) < 0) { 492 perror("bind"); 493 goto fail; 494 } 495 496 if (listen(sock, 1) < 0) { 497 perror("listen"); 498 goto fail; 499 } 500 501 return sock; 502 503 fail: 504 (void)close(sock); 505 506 return -1; 507 } 508 509 static void vub_free(struct VubDev *vdev_blk) 510 { 511 if (!vdev_blk) { 512 return; 513 } 514 515 g_main_loop_unref(vdev_blk->loop); 516 if (vdev_blk->blk_fd >= 0) { 517 close(vdev_blk->blk_fd); 518 } 519 g_free(vdev_blk); 520 } 521 522 static uint32_t 523 vub_get_blocksize(int fd) 524 { 525 uint32_t blocksize = 512; 526 527 #if defined(__linux__) && defined(BLKSSZGET) 528 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 529 return blocksize; 530 } 531 #endif 532 533 return blocksize; 534 } 535 536 static void 537 vub_initialize_config(int fd, struct virtio_blk_config *config) 538 { 539 off64_t capacity; 540 541 capacity = lseek64(fd, 0, SEEK_END); 542 config->capacity = capacity >> 9; 543 config->blk_size = vub_get_blocksize(fd); 544 config->size_max = 65536; 545 config->seg_max = 128 - 2; 546 config->min_io_size = 1; 547 config->opt_io_size = 1; 548 config->num_queues = 1; 549 #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 550 config->max_discard_sectors = 32768; 551 config->max_discard_seg = 1; 552 config->discard_sector_alignment = config->blk_size >> 9; 553 config->max_write_zeroes_sectors = 32768; 554 config->max_write_zeroes_seg = 1; 555 #endif 556 } 557 558 static VubDev * 559 vub_new(char *blk_file) 560 { 561 VubDev *vdev_blk; 562 563 vdev_blk = g_new0(VubDev, 1); 564 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 565 vdev_blk->blk_fd = vub_open(blk_file, 0); 566 if (vdev_blk->blk_fd < 0) { 567 fprintf(stderr, "Error to open block device %s\n", blk_file); 568 vub_free(vdev_blk); 569 return NULL; 570 } 571 vdev_blk->enable_ro = false; 572 vdev_blk->blkcfg.wce = 0; 573 vdev_blk->blk_name = blk_file; 574 575 /* fill virtio_blk_config with block parameters */ 576 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 577 578 return vdev_blk; 579 } 580 581 int main(int argc, char **argv) 582 { 583 int opt; 584 char *unix_socket = NULL; 585 char *blk_file = NULL; 586 bool enable_ro = false; 587 int lsock = -1, csock = -1; 588 VubDev *vdev_blk = NULL; 589 590 while ((opt = getopt(argc, argv, "b:rs:h")) != -1) { 591 switch (opt) { 592 case 'b': 593 blk_file = g_strdup(optarg); 594 break; 595 case 's': 596 unix_socket = g_strdup(optarg); 597 break; 598 case 'r': 599 enable_ro = true; 600 break; 601 case 'h': 602 default: 603 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 604 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 605 return 0; 606 } 607 } 608 609 if (!unix_socket || !blk_file) { 610 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 611 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 612 return -1; 613 } 614 615 lsock = unix_sock_new(unix_socket); 616 if (lsock < 0) { 617 goto err; 618 } 619 620 csock = accept(lsock, (void *)0, (void *)0); 621 if (csock < 0) { 622 fprintf(stderr, "Accept error %s\n", strerror(errno)); 623 goto err; 624 } 625 626 vdev_blk = vub_new(blk_file); 627 if (!vdev_blk) { 628 goto err; 629 } 630 if (enable_ro) { 631 vdev_blk->enable_ro = true; 632 } 633 634 vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); 635 636 g_main_loop_run(vdev_blk->loop); 637 638 vug_deinit(&vdev_blk->parent); 639 640 err: 641 vub_free(vdev_blk); 642 if (csock >= 0) { 643 close(csock); 644 } 645 if (lsock >= 0) { 646 close(lsock); 647 } 648 g_free(unix_socket); 649 g_free(blk_file); 650 651 return 0; 652 } 653