1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "standard-headers/linux/virtio_blk.h" 20 #include "contrib/libvhost-user/libvhost-user-glib.h" 21 #include "contrib/libvhost-user/libvhost-user.h" 22 23 #if defined(__linux__) 24 #include <linux/fs.h> 25 #include <sys/ioctl.h> 26 #endif 27 28 struct virtio_blk_inhdr { 29 unsigned char status; 30 }; 31 32 /* vhost user block device */ 33 typedef struct VubDev { 34 VugDev parent; 35 int blk_fd; 36 struct virtio_blk_config blkcfg; 37 bool enable_ro; 38 char *blk_name; 39 GMainLoop *loop; 40 } VubDev; 41 42 typedef struct VubReq { 43 VuVirtqElement *elem; 44 int64_t sector_num; 45 size_t size; 46 struct virtio_blk_inhdr *in; 47 struct virtio_blk_outhdr *out; 48 VubDev *vdev_blk; 49 struct VuVirtq *vq; 50 } VubReq; 51 52 /* refer util/iov.c */ 53 static size_t vub_iov_size(const struct iovec *iov, 54 const unsigned int iov_cnt) 55 { 56 size_t len; 57 unsigned int i; 58 59 len = 0; 60 for (i = 0; i < iov_cnt; i++) { 61 len += iov[i].iov_len; 62 } 63 return len; 64 } 65 66 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 67 { 68 VugDev *gdev; 69 VubDev *vdev_blk; 70 71 assert(vu_dev); 72 73 gdev = container_of(vu_dev, VugDev, parent); 74 vdev_blk = container_of(gdev, VubDev, parent); 75 if (buf) { 76 g_warning("vu_panic: %s", buf); 77 } 78 79 g_main_loop_quit(vdev_blk->loop); 80 } 81 82 static void vub_req_complete(VubReq *req) 83 { 84 VugDev *gdev = &req->vdev_blk->parent; 85 VuDev *vu_dev = &gdev->parent; 86 87 /* IO size with 1 extra status byte */ 88 vu_queue_push(vu_dev, req->vq, req->elem, 89 req->size + 1); 90 vu_queue_notify(vu_dev, req->vq); 91 92 if (req->elem) { 93 free(req->elem); 94 } 95 96 g_free(req); 97 } 98 99 static int vub_open(const char *file_name, bool wce) 100 { 101 int fd; 102 int flags = O_RDWR; 103 104 if (!wce) { 105 flags |= O_DIRECT; 106 } 107 108 fd = open(file_name, flags); 109 if (fd < 0) { 110 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 111 strerror(errno)); 112 return -1; 113 } 114 115 return fd; 116 } 117 118 static ssize_t 119 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 120 { 121 VubDev *vdev_blk = req->vdev_blk; 122 ssize_t rc; 123 124 if (!iovcnt) { 125 fprintf(stderr, "Invalid Read IOV count\n"); 126 return -1; 127 } 128 129 req->size = vub_iov_size(iov, iovcnt); 130 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 131 if (rc < 0) { 132 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 133 vdev_blk->blk_name, req->sector_num, req->size, 134 strerror(errno)); 135 return -1; 136 } 137 138 return rc; 139 } 140 141 static ssize_t 142 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 143 { 144 VubDev *vdev_blk = req->vdev_blk; 145 ssize_t rc; 146 147 if (!iovcnt) { 148 fprintf(stderr, "Invalid Write IOV count\n"); 149 return -1; 150 } 151 152 req->size = vub_iov_size(iov, iovcnt); 153 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 154 if (rc < 0) { 155 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 156 vdev_blk->blk_name, req->sector_num, req->size, 157 strerror(errno)); 158 return -1; 159 } 160 161 return rc; 162 } 163 164 static void 165 vub_flush(VubReq *req) 166 { 167 VubDev *vdev_blk = req->vdev_blk; 168 169 fdatasync(vdev_blk->blk_fd); 170 } 171 172 static int vub_virtio_process_req(VubDev *vdev_blk, 173 VuVirtq *vq) 174 { 175 VugDev *gdev = &vdev_blk->parent; 176 VuDev *vu_dev = &gdev->parent; 177 VuVirtqElement *elem; 178 uint32_t type; 179 unsigned in_num; 180 unsigned out_num; 181 VubReq *req; 182 183 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 184 if (!elem) { 185 return -1; 186 } 187 188 /* refer to hw/block/virtio_blk.c */ 189 if (elem->out_num < 1 || elem->in_num < 1) { 190 fprintf(stderr, "virtio-blk request missing headers\n"); 191 free(elem); 192 return -1; 193 } 194 195 req = g_new0(VubReq, 1); 196 req->vdev_blk = vdev_blk; 197 req->vq = vq; 198 req->elem = elem; 199 200 in_num = elem->in_num; 201 out_num = elem->out_num; 202 203 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 204 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 205 fprintf(stderr, "Invalid outhdr size\n"); 206 goto err; 207 } 208 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 209 out_num--; 210 211 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 212 fprintf(stderr, "Invalid inhdr size\n"); 213 goto err; 214 } 215 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 216 in_num--; 217 218 type = le32toh(req->out->type); 219 switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { 220 case VIRTIO_BLK_T_IN: { 221 ssize_t ret = 0; 222 bool is_write = type & VIRTIO_BLK_T_OUT; 223 req->sector_num = le64toh(req->out->sector); 224 if (is_write) { 225 ret = vub_writev(req, &elem->out_sg[1], out_num); 226 } else { 227 ret = vub_readv(req, &elem->in_sg[0], in_num); 228 } 229 if (ret >= 0) { 230 req->in->status = VIRTIO_BLK_S_OK; 231 } else { 232 req->in->status = VIRTIO_BLK_S_IOERR; 233 } 234 vub_req_complete(req); 235 break; 236 } 237 case VIRTIO_BLK_T_FLUSH: { 238 vub_flush(req); 239 req->in->status = VIRTIO_BLK_S_OK; 240 vub_req_complete(req); 241 break; 242 } 243 case VIRTIO_BLK_T_GET_ID: { 244 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 245 VIRTIO_BLK_ID_BYTES); 246 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 247 req->in->status = VIRTIO_BLK_S_OK; 248 req->size = elem->in_sg[0].iov_len; 249 vub_req_complete(req); 250 break; 251 } 252 default: { 253 req->in->status = VIRTIO_BLK_S_UNSUPP; 254 vub_req_complete(req); 255 break; 256 } 257 } 258 259 return 0; 260 261 err: 262 free(elem); 263 g_free(req); 264 return -1; 265 } 266 267 static void vub_process_vq(VuDev *vu_dev, int idx) 268 { 269 VugDev *gdev; 270 VubDev *vdev_blk; 271 VuVirtq *vq; 272 int ret; 273 274 if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { 275 fprintf(stderr, "VQ Index out of range: %d\n", idx); 276 vub_panic_cb(vu_dev, NULL); 277 return; 278 } 279 280 gdev = container_of(vu_dev, VugDev, parent); 281 vdev_blk = container_of(gdev, VubDev, parent); 282 assert(vdev_blk); 283 284 vq = vu_get_queue(vu_dev, idx); 285 assert(vq); 286 287 while (1) { 288 ret = vub_virtio_process_req(vdev_blk, vq); 289 if (ret) { 290 break; 291 } 292 } 293 } 294 295 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 296 { 297 VuVirtq *vq; 298 299 assert(vu_dev); 300 301 vq = vu_get_queue(vu_dev, idx); 302 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 303 } 304 305 static uint64_t 306 vub_get_features(VuDev *dev) 307 { 308 uint64_t features; 309 VugDev *gdev; 310 VubDev *vdev_blk; 311 312 gdev = container_of(dev, VugDev, parent); 313 vdev_blk = container_of(gdev, VubDev, parent); 314 315 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 316 1ull << VIRTIO_BLK_F_SEG_MAX | 317 1ull << VIRTIO_BLK_F_TOPOLOGY | 318 1ull << VIRTIO_BLK_F_BLK_SIZE | 319 1ull << VIRTIO_BLK_F_FLUSH | 320 1ull << VIRTIO_BLK_F_CONFIG_WCE | 321 1ull << VIRTIO_F_VERSION_1 | 322 1ull << VHOST_USER_F_PROTOCOL_FEATURES; 323 324 if (vdev_blk->enable_ro) { 325 features |= 1ull << VIRTIO_BLK_F_RO; 326 } 327 328 return features; 329 } 330 331 static uint64_t 332 vub_get_protocol_features(VuDev *dev) 333 { 334 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG; 335 } 336 337 static int 338 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 339 { 340 VugDev *gdev; 341 VubDev *vdev_blk; 342 343 gdev = container_of(vu_dev, VugDev, parent); 344 vdev_blk = container_of(gdev, VubDev, parent); 345 memcpy(config, &vdev_blk->blkcfg, len); 346 347 return 0; 348 } 349 350 static int 351 vub_set_config(VuDev *vu_dev, const uint8_t *data, 352 uint32_t offset, uint32_t size, uint32_t flags) 353 { 354 VugDev *gdev; 355 VubDev *vdev_blk; 356 uint8_t wce; 357 int fd; 358 359 /* don't support live migration */ 360 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { 361 return -1; 362 } 363 364 gdev = container_of(vu_dev, VugDev, parent); 365 vdev_blk = container_of(gdev, VubDev, parent); 366 367 if (offset != offsetof(struct virtio_blk_config, wce) || 368 size != 1) { 369 return -1; 370 } 371 372 wce = *data; 373 if (wce == vdev_blk->blkcfg.wce) { 374 /* Do nothing as same with old configuration */ 375 return 0; 376 } 377 378 vdev_blk->blkcfg.wce = wce; 379 fprintf(stdout, "Write Cache Policy Changed\n"); 380 if (vdev_blk->blk_fd >= 0) { 381 close(vdev_blk->blk_fd); 382 vdev_blk->blk_fd = -1; 383 } 384 385 fd = vub_open(vdev_blk->blk_name, wce); 386 if (fd < 0) { 387 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 388 vdev_blk->blk_fd = -1; 389 return -1; 390 } 391 vdev_blk->blk_fd = fd; 392 393 return 0; 394 } 395 396 static const VuDevIface vub_iface = { 397 .get_features = vub_get_features, 398 .queue_set_started = vub_queue_set_started, 399 .get_protocol_features = vub_get_protocol_features, 400 .get_config = vub_get_config, 401 .set_config = vub_set_config, 402 }; 403 404 static int unix_sock_new(char *unix_fn) 405 { 406 int sock; 407 struct sockaddr_un un; 408 size_t len; 409 410 assert(unix_fn); 411 412 sock = socket(AF_UNIX, SOCK_STREAM, 0); 413 if (sock <= 0) { 414 perror("socket"); 415 return -1; 416 } 417 418 un.sun_family = AF_UNIX; 419 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 420 len = sizeof(un.sun_family) + strlen(un.sun_path); 421 422 (void)unlink(unix_fn); 423 if (bind(sock, (struct sockaddr *)&un, len) < 0) { 424 perror("bind"); 425 goto fail; 426 } 427 428 if (listen(sock, 1) < 0) { 429 perror("listen"); 430 goto fail; 431 } 432 433 return sock; 434 435 fail: 436 (void)close(sock); 437 438 return -1; 439 } 440 441 static void vub_free(struct VubDev *vdev_blk) 442 { 443 if (!vdev_blk) { 444 return; 445 } 446 447 g_main_loop_unref(vdev_blk->loop); 448 if (vdev_blk->blk_fd >= 0) { 449 close(vdev_blk->blk_fd); 450 } 451 g_free(vdev_blk); 452 } 453 454 static uint32_t 455 vub_get_blocksize(int fd) 456 { 457 uint32_t blocksize = 512; 458 459 #if defined(__linux__) && defined(BLKSSZGET) 460 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 461 return blocksize; 462 } 463 #endif 464 465 return blocksize; 466 } 467 468 static void 469 vub_initialize_config(int fd, struct virtio_blk_config *config) 470 { 471 off64_t capacity; 472 473 capacity = lseek64(fd, 0, SEEK_END); 474 config->capacity = capacity >> 9; 475 config->blk_size = vub_get_blocksize(fd); 476 config->size_max = 65536; 477 config->seg_max = 128 - 2; 478 config->min_io_size = 1; 479 config->opt_io_size = 1; 480 config->num_queues = 1; 481 } 482 483 static VubDev * 484 vub_new(char *blk_file) 485 { 486 VubDev *vdev_blk; 487 488 vdev_blk = g_new0(VubDev, 1); 489 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 490 vdev_blk->blk_fd = vub_open(blk_file, 0); 491 if (vdev_blk->blk_fd < 0) { 492 fprintf(stderr, "Error to open block device %s\n", blk_file); 493 vub_free(vdev_blk); 494 return NULL; 495 } 496 vdev_blk->enable_ro = false; 497 vdev_blk->blkcfg.wce = 0; 498 vdev_blk->blk_name = blk_file; 499 500 /* fill virtio_blk_config with block parameters */ 501 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 502 503 return vdev_blk; 504 } 505 506 int main(int argc, char **argv) 507 { 508 int opt; 509 char *unix_socket = NULL; 510 char *blk_file = NULL; 511 bool enable_ro = false; 512 int lsock = -1, csock = -1; 513 VubDev *vdev_blk = NULL; 514 515 while ((opt = getopt(argc, argv, "b:rs:h")) != -1) { 516 switch (opt) { 517 case 'b': 518 blk_file = g_strdup(optarg); 519 break; 520 case 's': 521 unix_socket = g_strdup(optarg); 522 break; 523 case 'r': 524 enable_ro = true; 525 break; 526 case 'h': 527 default: 528 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 529 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 530 return 0; 531 } 532 } 533 534 if (!unix_socket || !blk_file) { 535 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 536 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 537 return -1; 538 } 539 540 lsock = unix_sock_new(unix_socket); 541 if (lsock < 0) { 542 goto err; 543 } 544 545 csock = accept(lsock, (void *)0, (void *)0); 546 if (csock < 0) { 547 fprintf(stderr, "Accept error %s\n", strerror(errno)); 548 goto err; 549 } 550 551 vdev_blk = vub_new(blk_file); 552 if (!vdev_blk) { 553 goto err; 554 } 555 if (enable_ro) { 556 vdev_blk->enable_ro = true; 557 } 558 559 vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); 560 561 g_main_loop_run(vdev_blk->loop); 562 563 vug_deinit(&vdev_blk->parent); 564 565 err: 566 vub_free(vdev_blk); 567 if (csock >= 0) { 568 close(csock); 569 } 570 if (lsock >= 0) { 571 close(lsock); 572 } 573 g_free(unix_socket); 574 g_free(blk_file); 575 576 return 0; 577 } 578