1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "standard-headers/linux/virtio_blk.h" 20 #include "contrib/libvhost-user/libvhost-user-glib.h" 21 #include "contrib/libvhost-user/libvhost-user.h" 22 23 #include <glib.h> 24 25 struct virtio_blk_inhdr { 26 unsigned char status; 27 }; 28 29 /* vhost user block device */ 30 typedef struct VubDev { 31 VugDev parent; 32 int blk_fd; 33 struct virtio_blk_config blkcfg; 34 bool enable_ro; 35 char *blk_name; 36 GMainLoop *loop; 37 } VubDev; 38 39 typedef struct VubReq { 40 VuVirtqElement *elem; 41 int64_t sector_num; 42 size_t size; 43 struct virtio_blk_inhdr *in; 44 struct virtio_blk_outhdr *out; 45 VubDev *vdev_blk; 46 struct VuVirtq *vq; 47 } VubReq; 48 49 /* refer util/iov.c */ 50 static size_t vub_iov_size(const struct iovec *iov, 51 const unsigned int iov_cnt) 52 { 53 size_t len; 54 unsigned int i; 55 56 len = 0; 57 for (i = 0; i < iov_cnt; i++) { 58 len += iov[i].iov_len; 59 } 60 return len; 61 } 62 63 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 64 { 65 VugDev *gdev; 66 VubDev *vdev_blk; 67 68 assert(vu_dev); 69 70 gdev = container_of(vu_dev, VugDev, parent); 71 vdev_blk = container_of(gdev, VubDev, parent); 72 if (buf) { 73 g_warning("vu_panic: %s", buf); 74 } 75 76 g_main_loop_quit(vdev_blk->loop); 77 } 78 79 static void vub_req_complete(VubReq *req) 80 { 81 VugDev *gdev = &req->vdev_blk->parent; 82 VuDev *vu_dev = &gdev->parent; 83 84 /* IO size with 1 extra status byte */ 85 vu_queue_push(vu_dev, req->vq, req->elem, 86 req->size + 1); 87 vu_queue_notify(vu_dev, req->vq); 88 89 if (req->elem) { 90 free(req->elem); 91 } 92 93 g_free(req); 94 } 95 96 static int vub_open(const char *file_name, bool wce) 97 { 98 int fd; 99 int flags = O_RDWR; 100 101 if (!wce) { 102 flags |= O_DIRECT; 103 } 104 105 fd = open(file_name, flags); 106 if (fd < 0) { 107 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 108 strerror(errno)); 109 return -1; 110 } 111 112 return fd; 113 } 114 115 static ssize_t 116 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 117 { 118 VubDev *vdev_blk = req->vdev_blk; 119 ssize_t rc; 120 121 if (!iovcnt) { 122 fprintf(stderr, "Invalid Read IOV count\n"); 123 return -1; 124 } 125 126 req->size = vub_iov_size(iov, iovcnt); 127 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 128 if (rc < 0) { 129 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 130 vdev_blk->blk_name, req->sector_num, req->size, 131 strerror(errno)); 132 return -1; 133 } 134 135 return rc; 136 } 137 138 static ssize_t 139 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 140 { 141 VubDev *vdev_blk = req->vdev_blk; 142 ssize_t rc; 143 144 if (!iovcnt) { 145 fprintf(stderr, "Invalid Write IOV count\n"); 146 return -1; 147 } 148 149 req->size = vub_iov_size(iov, iovcnt); 150 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 151 if (rc < 0) { 152 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 153 vdev_blk->blk_name, req->sector_num, req->size, 154 strerror(errno)); 155 return -1; 156 } 157 158 return rc; 159 } 160 161 static void 162 vub_flush(VubReq *req) 163 { 164 VubDev *vdev_blk = req->vdev_blk; 165 166 fdatasync(vdev_blk->blk_fd); 167 } 168 169 static int vub_virtio_process_req(VubDev *vdev_blk, 170 VuVirtq *vq) 171 { 172 VugDev *gdev = &vdev_blk->parent; 173 VuDev *vu_dev = &gdev->parent; 174 VuVirtqElement *elem; 175 uint32_t type; 176 unsigned in_num; 177 unsigned out_num; 178 VubReq *req; 179 180 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 181 if (!elem) { 182 return -1; 183 } 184 185 /* refer to hw/block/virtio_blk.c */ 186 if (elem->out_num < 1 || elem->in_num < 1) { 187 fprintf(stderr, "virtio-blk request missing headers\n"); 188 free(elem); 189 return -1; 190 } 191 192 req = g_new0(VubReq, 1); 193 req->vdev_blk = vdev_blk; 194 req->vq = vq; 195 req->elem = elem; 196 197 in_num = elem->in_num; 198 out_num = elem->out_num; 199 200 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 201 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 202 fprintf(stderr, "Invalid outhdr size\n"); 203 goto err; 204 } 205 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 206 out_num--; 207 208 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 209 fprintf(stderr, "Invalid inhdr size\n"); 210 goto err; 211 } 212 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 213 in_num--; 214 215 type = le32toh(req->out->type); 216 switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { 217 case VIRTIO_BLK_T_IN: { 218 ssize_t ret = 0; 219 bool is_write = type & VIRTIO_BLK_T_OUT; 220 req->sector_num = le64toh(req->out->sector); 221 if (is_write) { 222 ret = vub_writev(req, &elem->out_sg[1], out_num); 223 } else { 224 ret = vub_readv(req, &elem->in_sg[0], in_num); 225 } 226 if (ret >= 0) { 227 req->in->status = VIRTIO_BLK_S_OK; 228 } else { 229 req->in->status = VIRTIO_BLK_S_IOERR; 230 } 231 vub_req_complete(req); 232 break; 233 } 234 case VIRTIO_BLK_T_FLUSH: { 235 vub_flush(req); 236 req->in->status = VIRTIO_BLK_S_OK; 237 vub_req_complete(req); 238 break; 239 } 240 case VIRTIO_BLK_T_GET_ID: { 241 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 242 VIRTIO_BLK_ID_BYTES); 243 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 244 req->in->status = VIRTIO_BLK_S_OK; 245 req->size = elem->in_sg[0].iov_len; 246 vub_req_complete(req); 247 break; 248 } 249 default: { 250 req->in->status = VIRTIO_BLK_S_UNSUPP; 251 vub_req_complete(req); 252 break; 253 } 254 } 255 256 return 0; 257 258 err: 259 free(elem); 260 g_free(req); 261 return -1; 262 } 263 264 static void vub_process_vq(VuDev *vu_dev, int idx) 265 { 266 VugDev *gdev; 267 VubDev *vdev_blk; 268 VuVirtq *vq; 269 int ret; 270 271 if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { 272 fprintf(stderr, "VQ Index out of range: %d\n", idx); 273 vub_panic_cb(vu_dev, NULL); 274 return; 275 } 276 277 gdev = container_of(vu_dev, VugDev, parent); 278 vdev_blk = container_of(gdev, VubDev, parent); 279 assert(vdev_blk); 280 281 vq = vu_get_queue(vu_dev, idx); 282 assert(vq); 283 284 while (1) { 285 ret = vub_virtio_process_req(vdev_blk, vq); 286 if (ret) { 287 break; 288 } 289 } 290 } 291 292 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 293 { 294 VuVirtq *vq; 295 296 assert(vu_dev); 297 298 vq = vu_get_queue(vu_dev, idx); 299 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 300 } 301 302 static uint64_t 303 vub_get_features(VuDev *dev) 304 { 305 uint64_t features; 306 VugDev *gdev; 307 VubDev *vdev_blk; 308 309 gdev = container_of(dev, VugDev, parent); 310 vdev_blk = container_of(gdev, VubDev, parent); 311 312 features = 1ull << VIRTIO_BLK_F_SIZE_MAX | 313 1ull << VIRTIO_BLK_F_SEG_MAX | 314 1ull << VIRTIO_BLK_F_TOPOLOGY | 315 1ull << VIRTIO_BLK_F_BLK_SIZE | 316 1ull << VIRTIO_BLK_F_FLUSH | 317 1ull << VIRTIO_BLK_F_CONFIG_WCE | 318 1ull << VIRTIO_F_VERSION_1 | 319 1ull << VHOST_USER_F_PROTOCOL_FEATURES; 320 321 if (vdev_blk->enable_ro) { 322 features |= 1ull << VIRTIO_BLK_F_RO; 323 } 324 325 return features; 326 } 327 328 static uint64_t 329 vub_get_protocol_features(VuDev *dev) 330 { 331 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG; 332 } 333 334 static int 335 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 336 { 337 VugDev *gdev; 338 VubDev *vdev_blk; 339 340 gdev = container_of(vu_dev, VugDev, parent); 341 vdev_blk = container_of(gdev, VubDev, parent); 342 memcpy(config, &vdev_blk->blkcfg, len); 343 344 return 0; 345 } 346 347 static int 348 vub_set_config(VuDev *vu_dev, const uint8_t *data, 349 uint32_t offset, uint32_t size, uint32_t flags) 350 { 351 VugDev *gdev; 352 VubDev *vdev_blk; 353 uint8_t wce; 354 int fd; 355 356 /* don't support live migration */ 357 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { 358 return -1; 359 } 360 361 gdev = container_of(vu_dev, VugDev, parent); 362 vdev_blk = container_of(gdev, VubDev, parent); 363 364 if (offset != offsetof(struct virtio_blk_config, wce) || 365 size != 1) { 366 return -1; 367 } 368 369 wce = *data; 370 if (wce == vdev_blk->blkcfg.wce) { 371 /* Do nothing as same with old configuration */ 372 return 0; 373 } 374 375 vdev_blk->blkcfg.wce = wce; 376 fprintf(stdout, "Write Cache Policy Changed\n"); 377 if (vdev_blk->blk_fd >= 0) { 378 close(vdev_blk->blk_fd); 379 vdev_blk->blk_fd = -1; 380 } 381 382 fd = vub_open(vdev_blk->blk_name, wce); 383 if (fd < 0) { 384 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 385 vdev_blk->blk_fd = -1; 386 return -1; 387 } 388 vdev_blk->blk_fd = fd; 389 390 return 0; 391 } 392 393 static const VuDevIface vub_iface = { 394 .get_features = vub_get_features, 395 .queue_set_started = vub_queue_set_started, 396 .get_protocol_features = vub_get_protocol_features, 397 .get_config = vub_get_config, 398 .set_config = vub_set_config, 399 }; 400 401 static int unix_sock_new(char *unix_fn) 402 { 403 int sock; 404 struct sockaddr_un un; 405 size_t len; 406 407 assert(unix_fn); 408 409 sock = socket(AF_UNIX, SOCK_STREAM, 0); 410 if (sock <= 0) { 411 perror("socket"); 412 return -1; 413 } 414 415 un.sun_family = AF_UNIX; 416 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 417 len = sizeof(un.sun_family) + strlen(un.sun_path); 418 419 (void)unlink(unix_fn); 420 if (bind(sock, (struct sockaddr *)&un, len) < 0) { 421 perror("bind"); 422 goto fail; 423 } 424 425 if (listen(sock, 1) < 0) { 426 perror("listen"); 427 goto fail; 428 } 429 430 return sock; 431 432 fail: 433 (void)close(sock); 434 435 return -1; 436 } 437 438 static void vub_free(struct VubDev *vdev_blk) 439 { 440 if (!vdev_blk) { 441 return; 442 } 443 444 g_main_loop_unref(vdev_blk->loop); 445 if (vdev_blk->blk_fd >= 0) { 446 close(vdev_blk->blk_fd); 447 } 448 g_free(vdev_blk); 449 } 450 451 static uint32_t 452 vub_get_blocksize(int fd) 453 { 454 uint32_t blocksize = 512; 455 456 #if defined(__linux__) && defined(BLKSSZGET) 457 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 458 return blocklen; 459 } 460 #endif 461 462 return blocksize; 463 } 464 465 static void 466 vub_initialize_config(int fd, struct virtio_blk_config *config) 467 { 468 off64_t capacity; 469 470 capacity = lseek64(fd, 0, SEEK_END); 471 config->capacity = capacity >> 9; 472 config->blk_size = vub_get_blocksize(fd); 473 config->size_max = 65536; 474 config->seg_max = 128 - 2; 475 config->min_io_size = 1; 476 config->opt_io_size = 1; 477 config->num_queues = 1; 478 } 479 480 static VubDev * 481 vub_new(char *blk_file) 482 { 483 VubDev *vdev_blk; 484 485 vdev_blk = g_new0(VubDev, 1); 486 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 487 vdev_blk->blk_fd = vub_open(blk_file, 0); 488 if (vdev_blk->blk_fd < 0) { 489 fprintf(stderr, "Error to open block device %s\n", blk_file); 490 vub_free(vdev_blk); 491 return NULL; 492 } 493 vdev_blk->enable_ro = false; 494 vdev_blk->blkcfg.wce = 0; 495 vdev_blk->blk_name = blk_file; 496 497 /* fill virtio_blk_config with block parameters */ 498 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 499 500 return vdev_blk; 501 } 502 503 int main(int argc, char **argv) 504 { 505 int opt; 506 char *unix_socket = NULL; 507 char *blk_file = NULL; 508 bool enable_ro = false; 509 int lsock = -1, csock = -1; 510 VubDev *vdev_blk = NULL; 511 512 while ((opt = getopt(argc, argv, "b:rs:h")) != -1) { 513 switch (opt) { 514 case 'b': 515 blk_file = g_strdup(optarg); 516 break; 517 case 's': 518 unix_socket = g_strdup(optarg); 519 break; 520 case 'r': 521 enable_ro = true; 522 break; 523 case 'h': 524 default: 525 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 526 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 527 return 0; 528 } 529 } 530 531 if (!unix_socket || !blk_file) { 532 printf("Usage: %s [ -b block device or file, -s UNIX domain socket" 533 " | -r Enable read-only ] | [ -h ]\n", argv[0]); 534 return -1; 535 } 536 537 lsock = unix_sock_new(unix_socket); 538 if (lsock < 0) { 539 goto err; 540 } 541 542 csock = accept(lsock, (void *)0, (void *)0); 543 if (csock < 0) { 544 fprintf(stderr, "Accept error %s\n", strerror(errno)); 545 goto err; 546 } 547 548 vdev_blk = vub_new(blk_file); 549 if (!vdev_blk) { 550 goto err; 551 } 552 if (enable_ro) { 553 vdev_blk->enable_ro = true; 554 } 555 556 vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); 557 558 g_main_loop_run(vdev_blk->loop); 559 560 vug_deinit(&vdev_blk->parent); 561 562 err: 563 vub_free(vdev_blk); 564 if (csock >= 0) { 565 close(csock); 566 } 567 if (lsock >= 0) { 568 close(lsock); 569 } 570 g_free(unix_socket); 571 g_free(blk_file); 572 573 return 0; 574 } 575