1 /* 2 * vhost-user-blk sample application 3 * 4 * Copyright (c) 2017 Intel Corporation. All rights reserved. 5 * 6 * Author: 7 * Changpeng Liu <changpeng.liu@intel.com> 8 * 9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver 10 * implementation by: 11 * Felipe Franciosi <felipe@nutanix.com> 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * 14 * This work is licensed under the terms of the GNU GPL, version 2 only. 15 * See the COPYING file in the top-level directory. 16 */ 17 18 #include "qemu/osdep.h" 19 #include "standard-headers/linux/virtio_blk.h" 20 #include "contrib/libvhost-user/libvhost-user-glib.h" 21 #include "contrib/libvhost-user/libvhost-user.h" 22 23 #include <glib.h> 24 25 struct virtio_blk_inhdr { 26 unsigned char status; 27 }; 28 29 /* vhost user block device */ 30 typedef struct VubDev { 31 VugDev parent; 32 int blk_fd; 33 struct virtio_blk_config blkcfg; 34 char *blk_name; 35 GMainLoop *loop; 36 } VubDev; 37 38 typedef struct VubReq { 39 VuVirtqElement *elem; 40 int64_t sector_num; 41 size_t size; 42 struct virtio_blk_inhdr *in; 43 struct virtio_blk_outhdr *out; 44 VubDev *vdev_blk; 45 struct VuVirtq *vq; 46 } VubReq; 47 48 /* refer util/iov.c */ 49 static size_t vub_iov_size(const struct iovec *iov, 50 const unsigned int iov_cnt) 51 { 52 size_t len; 53 unsigned int i; 54 55 len = 0; 56 for (i = 0; i < iov_cnt; i++) { 57 len += iov[i].iov_len; 58 } 59 return len; 60 } 61 62 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 63 { 64 VugDev *gdev; 65 VubDev *vdev_blk; 66 67 assert(vu_dev); 68 69 gdev = container_of(vu_dev, VugDev, parent); 70 vdev_blk = container_of(gdev, VubDev, parent); 71 if (buf) { 72 g_warning("vu_panic: %s", buf); 73 } 74 75 g_main_loop_quit(vdev_blk->loop); 76 } 77 78 static void vub_req_complete(VubReq *req) 79 { 80 VugDev *gdev = &req->vdev_blk->parent; 81 VuDev *vu_dev = &gdev->parent; 82 83 /* IO size with 1 extra status byte */ 84 vu_queue_push(vu_dev, req->vq, req->elem, 85 req->size + 1); 86 vu_queue_notify(vu_dev, req->vq); 87 88 if (req->elem) { 89 free(req->elem); 90 } 91 92 g_free(req); 93 } 94 95 static int vub_open(const char *file_name, bool wce) 96 { 97 int fd; 98 int flags = O_RDWR; 99 100 if (!wce) { 101 flags |= O_DIRECT; 102 } 103 104 fd = open(file_name, flags); 105 if (fd < 0) { 106 fprintf(stderr, "Cannot open file %s, %s\n", file_name, 107 strerror(errno)); 108 return -1; 109 } 110 111 return fd; 112 } 113 114 static ssize_t 115 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) 116 { 117 VubDev *vdev_blk = req->vdev_blk; 118 ssize_t rc; 119 120 if (!iovcnt) { 121 fprintf(stderr, "Invalid Read IOV count\n"); 122 return -1; 123 } 124 125 req->size = vub_iov_size(iov, iovcnt); 126 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 127 if (rc < 0) { 128 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 129 vdev_blk->blk_name, req->sector_num, req->size, 130 strerror(errno)); 131 return -1; 132 } 133 134 return rc; 135 } 136 137 static ssize_t 138 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) 139 { 140 VubDev *vdev_blk = req->vdev_blk; 141 ssize_t rc; 142 143 if (!iovcnt) { 144 fprintf(stderr, "Invalid Write IOV count\n"); 145 return -1; 146 } 147 148 req->size = vub_iov_size(iov, iovcnt); 149 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); 150 if (rc < 0) { 151 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", 152 vdev_blk->blk_name, req->sector_num, req->size, 153 strerror(errno)); 154 return -1; 155 } 156 157 return rc; 158 } 159 160 static void 161 vub_flush(VubReq *req) 162 { 163 VubDev *vdev_blk = req->vdev_blk; 164 165 fdatasync(vdev_blk->blk_fd); 166 } 167 168 static int vub_virtio_process_req(VubDev *vdev_blk, 169 VuVirtq *vq) 170 { 171 VugDev *gdev = &vdev_blk->parent; 172 VuDev *vu_dev = &gdev->parent; 173 VuVirtqElement *elem; 174 uint32_t type; 175 unsigned in_num; 176 unsigned out_num; 177 VubReq *req; 178 179 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); 180 if (!elem) { 181 return -1; 182 } 183 184 /* refer to hw/block/virtio_blk.c */ 185 if (elem->out_num < 1 || elem->in_num < 1) { 186 fprintf(stderr, "virtio-blk request missing headers\n"); 187 free(elem); 188 return -1; 189 } 190 191 req = g_new0(VubReq, 1); 192 req->vdev_blk = vdev_blk; 193 req->vq = vq; 194 req->elem = elem; 195 196 in_num = elem->in_num; 197 out_num = elem->out_num; 198 199 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ 200 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { 201 fprintf(stderr, "Invalid outhdr size\n"); 202 goto err; 203 } 204 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; 205 out_num--; 206 207 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { 208 fprintf(stderr, "Invalid inhdr size\n"); 209 goto err; 210 } 211 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; 212 in_num--; 213 214 type = le32toh(req->out->type); 215 switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { 216 case VIRTIO_BLK_T_IN: { 217 ssize_t ret = 0; 218 bool is_write = type & VIRTIO_BLK_T_OUT; 219 req->sector_num = le64toh(req->out->sector); 220 if (is_write) { 221 ret = vub_writev(req, &elem->out_sg[1], out_num); 222 } else { 223 ret = vub_readv(req, &elem->in_sg[0], in_num); 224 } 225 if (ret >= 0) { 226 req->in->status = VIRTIO_BLK_S_OK; 227 } else { 228 req->in->status = VIRTIO_BLK_S_IOERR; 229 } 230 vub_req_complete(req); 231 break; 232 } 233 case VIRTIO_BLK_T_FLUSH: { 234 vub_flush(req); 235 req->in->status = VIRTIO_BLK_S_OK; 236 vub_req_complete(req); 237 break; 238 } 239 case VIRTIO_BLK_T_GET_ID: { 240 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 241 VIRTIO_BLK_ID_BYTES); 242 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 243 req->in->status = VIRTIO_BLK_S_OK; 244 req->size = elem->in_sg[0].iov_len; 245 vub_req_complete(req); 246 break; 247 } 248 default: { 249 req->in->status = VIRTIO_BLK_S_UNSUPP; 250 vub_req_complete(req); 251 break; 252 } 253 } 254 255 return 0; 256 257 err: 258 free(elem); 259 g_free(req); 260 return -1; 261 } 262 263 static void vub_process_vq(VuDev *vu_dev, int idx) 264 { 265 VugDev *gdev; 266 VubDev *vdev_blk; 267 VuVirtq *vq; 268 int ret; 269 270 if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { 271 fprintf(stderr, "VQ Index out of range: %d\n", idx); 272 vub_panic_cb(vu_dev, NULL); 273 return; 274 } 275 276 gdev = container_of(vu_dev, VugDev, parent); 277 vdev_blk = container_of(gdev, VubDev, parent); 278 assert(vdev_blk); 279 280 vq = vu_get_queue(vu_dev, idx); 281 assert(vq); 282 283 while (1) { 284 ret = vub_virtio_process_req(vdev_blk, vq); 285 if (ret) { 286 break; 287 } 288 } 289 } 290 291 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) 292 { 293 VuVirtq *vq; 294 295 assert(vu_dev); 296 297 vq = vu_get_queue(vu_dev, idx); 298 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); 299 } 300 301 static uint64_t 302 vub_get_features(VuDev *dev) 303 { 304 return 1ull << VIRTIO_BLK_F_SIZE_MAX | 305 1ull << VIRTIO_BLK_F_SEG_MAX | 306 1ull << VIRTIO_BLK_F_TOPOLOGY | 307 1ull << VIRTIO_BLK_F_BLK_SIZE | 308 1ull << VIRTIO_BLK_F_FLUSH | 309 1ull << VIRTIO_BLK_F_CONFIG_WCE | 310 1ull << VIRTIO_F_VERSION_1 | 311 1ull << VHOST_USER_F_PROTOCOL_FEATURES; 312 } 313 314 static int 315 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) 316 { 317 VugDev *gdev; 318 VubDev *vdev_blk; 319 320 gdev = container_of(vu_dev, VugDev, parent); 321 vdev_blk = container_of(gdev, VubDev, parent); 322 memcpy(config, &vdev_blk->blkcfg, len); 323 324 return 0; 325 } 326 327 static int 328 vub_set_config(VuDev *vu_dev, const uint8_t *data, 329 uint32_t offset, uint32_t size, uint32_t flags) 330 { 331 VugDev *gdev; 332 VubDev *vdev_blk; 333 uint8_t wce; 334 int fd; 335 336 /* don't support live migration */ 337 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { 338 return -1; 339 } 340 341 gdev = container_of(vu_dev, VugDev, parent); 342 vdev_blk = container_of(gdev, VubDev, parent); 343 344 if (offset != offsetof(struct virtio_blk_config, wce) || 345 size != 1) { 346 return -1; 347 } 348 349 wce = *data; 350 if (wce == vdev_blk->blkcfg.wce) { 351 /* Do nothing as same with old configuration */ 352 return 0; 353 } 354 355 vdev_blk->blkcfg.wce = wce; 356 fprintf(stdout, "Write Cache Policy Changed\n"); 357 if (vdev_blk->blk_fd >= 0) { 358 close(vdev_blk->blk_fd); 359 vdev_blk->blk_fd = -1; 360 } 361 362 fd = vub_open(vdev_blk->blk_name, wce); 363 if (fd < 0) { 364 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); 365 vdev_blk->blk_fd = -1; 366 return -1; 367 } 368 vdev_blk->blk_fd = fd; 369 370 return 0; 371 } 372 373 static const VuDevIface vub_iface = { 374 .get_features = vub_get_features, 375 .queue_set_started = vub_queue_set_started, 376 .get_config = vub_get_config, 377 .set_config = vub_set_config, 378 }; 379 380 static int unix_sock_new(char *unix_fn) 381 { 382 int sock; 383 struct sockaddr_un un; 384 size_t len; 385 386 assert(unix_fn); 387 388 sock = socket(AF_UNIX, SOCK_STREAM, 0); 389 if (sock <= 0) { 390 perror("socket"); 391 return -1; 392 } 393 394 un.sun_family = AF_UNIX; 395 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); 396 len = sizeof(un.sun_family) + strlen(un.sun_path); 397 398 (void)unlink(unix_fn); 399 if (bind(sock, (struct sockaddr *)&un, len) < 0) { 400 perror("bind"); 401 goto fail; 402 } 403 404 if (listen(sock, 1) < 0) { 405 perror("listen"); 406 goto fail; 407 } 408 409 return sock; 410 411 fail: 412 (void)close(sock); 413 414 return -1; 415 } 416 417 static void vub_free(struct VubDev *vdev_blk) 418 { 419 if (!vdev_blk) { 420 return; 421 } 422 423 g_main_loop_unref(vdev_blk->loop); 424 if (vdev_blk->blk_fd >= 0) { 425 close(vdev_blk->blk_fd); 426 } 427 g_free(vdev_blk); 428 } 429 430 static uint32_t 431 vub_get_blocksize(int fd) 432 { 433 uint32_t blocksize = 512; 434 435 #if defined(__linux__) && defined(BLKSSZGET) 436 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { 437 return blocklen; 438 } 439 #endif 440 441 return blocksize; 442 } 443 444 static void 445 vub_initialize_config(int fd, struct virtio_blk_config *config) 446 { 447 off64_t capacity; 448 449 capacity = lseek64(fd, 0, SEEK_END); 450 config->capacity = capacity >> 9; 451 config->blk_size = vub_get_blocksize(fd); 452 config->size_max = 65536; 453 config->seg_max = 128 - 2; 454 config->min_io_size = 1; 455 config->opt_io_size = 1; 456 config->num_queues = 1; 457 } 458 459 static VubDev * 460 vub_new(char *blk_file) 461 { 462 VubDev *vdev_blk; 463 464 vdev_blk = g_new0(VubDev, 1); 465 vdev_blk->loop = g_main_loop_new(NULL, FALSE); 466 vdev_blk->blk_fd = vub_open(blk_file, 0); 467 if (vdev_blk->blk_fd < 0) { 468 fprintf(stderr, "Error to open block device %s\n", blk_file); 469 vub_free(vdev_blk); 470 return NULL; 471 } 472 vdev_blk->blkcfg.wce = 0; 473 vdev_blk->blk_name = blk_file; 474 475 /* fill virtio_blk_config with block parameters */ 476 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); 477 478 return vdev_blk; 479 } 480 481 int main(int argc, char **argv) 482 { 483 int opt; 484 char *unix_socket = NULL; 485 char *blk_file = NULL; 486 int lsock = -1, csock = -1; 487 VubDev *vdev_blk = NULL; 488 489 while ((opt = getopt(argc, argv, "b:s:h")) != -1) { 490 switch (opt) { 491 case 'b': 492 blk_file = g_strdup(optarg); 493 break; 494 case 's': 495 unix_socket = g_strdup(optarg); 496 break; 497 case 'h': 498 default: 499 printf("Usage: %s [-b block device or file, -s UNIX domain socket]" 500 " | [ -h ]\n", argv[0]); 501 return 0; 502 } 503 } 504 505 if (!unix_socket || !blk_file) { 506 printf("Usage: %s [-b block device or file, -s UNIX domain socket] |" 507 " [ -h ]\n", argv[0]); 508 return -1; 509 } 510 511 lsock = unix_sock_new(unix_socket); 512 if (lsock < 0) { 513 goto err; 514 } 515 516 csock = accept(lsock, (void *)0, (void *)0); 517 if (csock < 0) { 518 fprintf(stderr, "Accept error %s\n", strerror(errno)); 519 goto err; 520 } 521 522 vdev_blk = vub_new(blk_file); 523 if (!vdev_blk) { 524 goto err; 525 } 526 527 vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); 528 529 g_main_loop_run(vdev_blk->loop); 530 531 vug_deinit(&vdev_blk->parent); 532 533 err: 534 vub_free(vdev_blk); 535 if (csock >= 0) { 536 close(csock); 537 } 538 if (lsock >= 0) { 539 close(lsock); 540 } 541 g_free(unix_socket); 542 g_free(blk_file); 543 544 return 0; 545 } 546