1 /* 2 * Vhost User Bridge 3 * 4 * Copyright (c) 2015 Red Hat, Inc. 5 * 6 * Authors: 7 * Victor Kaplansky <victork@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or 10 * later. See the COPYING file in the top-level directory. 11 */ 12 13 /* 14 * TODO: 15 * - main should get parameters from the command line. 16 * - implement all request handlers. 17 * - test for broken requests and virtqueue. 18 * - implement features defined by Virtio 1.0 spec. 19 * - support mergeable buffers and indirect descriptors. 20 * - implement RESET_DEVICE request. 21 * - implement clean shutdown. 22 * - implement non-blocking writes to UDP backend. 23 * - implement polling strategy. 24 */ 25 26 #include <stddef.h> 27 #include <assert.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <stdint.h> 31 #include <inttypes.h> 32 #include <string.h> 33 #include <unistd.h> 34 #include <errno.h> 35 #include <sys/types.h> 36 #include <sys/socket.h> 37 #include <sys/un.h> 38 #include <sys/unistd.h> 39 #include <sys/mman.h> 40 #include <sys/eventfd.h> 41 #include <arpa/inet.h> 42 43 #include <linux/vhost.h> 44 45 #include "qemu/atomic.h" 46 #include "standard-headers/linux/virtio_net.h" 47 #include "standard-headers/linux/virtio_ring.h" 48 49 #define VHOST_USER_BRIDGE_DEBUG 1 50 51 #define DPRINT(...) \ 52 do { \ 53 if (VHOST_USER_BRIDGE_DEBUG) { \ 54 printf(__VA_ARGS__); \ 55 } \ 56 } while (0) 57 58 typedef void (*CallbackFunc)(int sock, void *ctx); 59 60 typedef struct Event { 61 void *ctx; 62 CallbackFunc callback; 63 } Event; 64 65 typedef struct Dispatcher { 66 int max_sock; 67 fd_set fdset; 68 Event events[FD_SETSIZE]; 69 } Dispatcher; 70 71 static void 72 vubr_die(const char *s) 73 { 74 perror(s); 75 exit(1); 76 } 77 78 static int 79 dispatcher_init(Dispatcher *dispr) 80 { 81 FD_ZERO(&dispr->fdset); 82 dispr->max_sock = -1; 83 return 0; 84 } 85 86 static int 87 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb) 88 { 89 if (sock >= FD_SETSIZE) { 90 fprintf(stderr, 91 "Error: Failed to add new event. sock %d should be less than %d\n", 92 sock, FD_SETSIZE); 93 return -1; 94 } 95 96 dispr->events[sock].ctx = ctx; 97 dispr->events[sock].callback = cb; 98 99 FD_SET(sock, &dispr->fdset); 100 if (sock > dispr->max_sock) { 101 dispr->max_sock = sock; 102 } 103 DPRINT("Added sock %d for watching. max_sock: %d\n", 104 sock, dispr->max_sock); 105 return 0; 106 } 107 108 #if 0 109 /* dispatcher_remove() is not currently in use but may be useful 110 * in the future. */ 111 static int 112 dispatcher_remove(Dispatcher *dispr, int sock) 113 { 114 if (sock >= FD_SETSIZE) { 115 fprintf(stderr, 116 "Error: Failed to remove event. sock %d should be less than %d\n", 117 sock, FD_SETSIZE); 118 return -1; 119 } 120 121 FD_CLR(sock, &dispr->fdset); 122 return 0; 123 } 124 #endif 125 126 /* timeout in us */ 127 static int 128 dispatcher_wait(Dispatcher *dispr, uint32_t timeout) 129 { 130 struct timeval tv; 131 tv.tv_sec = timeout / 1000000; 132 tv.tv_usec = timeout % 1000000; 133 134 fd_set fdset = dispr->fdset; 135 136 /* wait until some of sockets become readable. */ 137 int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv); 138 139 if (rc == -1) { 140 vubr_die("select"); 141 } 142 143 /* Timeout */ 144 if (rc == 0) { 145 return 0; 146 } 147 148 /* Now call callback for every ready socket. */ 149 150 int sock; 151 for (sock = 0; sock < dispr->max_sock + 1; sock++) 152 if (FD_ISSET(sock, &fdset)) { 153 Event *e = &dispr->events[sock]; 154 e->callback(sock, e->ctx); 155 } 156 157 return 0; 158 } 159 160 typedef struct VubrVirtq { 161 int call_fd; 162 int kick_fd; 163 uint32_t size; 164 uint16_t last_avail_index; 165 uint16_t last_used_index; 166 struct vring_desc *desc; 167 struct vring_avail *avail; 168 struct vring_used *used; 169 } VubrVirtq; 170 171 /* Based on qemu/hw/virtio/vhost-user.c */ 172 173 #define VHOST_MEMORY_MAX_NREGIONS 8 174 #define VHOST_USER_F_PROTOCOL_FEATURES 30 175 176 enum VhostUserProtocolFeature { 177 VHOST_USER_PROTOCOL_F_MQ = 0, 178 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 179 VHOST_USER_PROTOCOL_F_RARP = 2, 180 181 VHOST_USER_PROTOCOL_F_MAX 182 }; 183 184 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 185 186 typedef enum VhostUserRequest { 187 VHOST_USER_NONE = 0, 188 VHOST_USER_GET_FEATURES = 1, 189 VHOST_USER_SET_FEATURES = 2, 190 VHOST_USER_SET_OWNER = 3, 191 VHOST_USER_RESET_OWNER = 4, 192 VHOST_USER_SET_MEM_TABLE = 5, 193 VHOST_USER_SET_LOG_BASE = 6, 194 VHOST_USER_SET_LOG_FD = 7, 195 VHOST_USER_SET_VRING_NUM = 8, 196 VHOST_USER_SET_VRING_ADDR = 9, 197 VHOST_USER_SET_VRING_BASE = 10, 198 VHOST_USER_GET_VRING_BASE = 11, 199 VHOST_USER_SET_VRING_KICK = 12, 200 VHOST_USER_SET_VRING_CALL = 13, 201 VHOST_USER_SET_VRING_ERR = 14, 202 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 203 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 204 VHOST_USER_GET_QUEUE_NUM = 17, 205 VHOST_USER_SET_VRING_ENABLE = 18, 206 VHOST_USER_SEND_RARP = 19, 207 VHOST_USER_MAX 208 } VhostUserRequest; 209 210 typedef struct VhostUserMemoryRegion { 211 uint64_t guest_phys_addr; 212 uint64_t memory_size; 213 uint64_t userspace_addr; 214 uint64_t mmap_offset; 215 } VhostUserMemoryRegion; 216 217 typedef struct VhostUserMemory { 218 uint32_t nregions; 219 uint32_t padding; 220 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 221 } VhostUserMemory; 222 223 typedef struct VhostUserMsg { 224 VhostUserRequest request; 225 226 #define VHOST_USER_VERSION_MASK (0x3) 227 #define VHOST_USER_REPLY_MASK (0x1<<2) 228 uint32_t flags; 229 uint32_t size; /* the following payload size */ 230 union { 231 #define VHOST_USER_VRING_IDX_MASK (0xff) 232 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) 233 uint64_t u64; 234 struct vhost_vring_state state; 235 struct vhost_vring_addr addr; 236 VhostUserMemory memory; 237 } payload; 238 int fds[VHOST_MEMORY_MAX_NREGIONS]; 239 int fd_num; 240 } QEMU_PACKED VhostUserMsg; 241 242 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) 243 244 /* The version of the protocol we support */ 245 #define VHOST_USER_VERSION (0x1) 246 247 #define MAX_NR_VIRTQUEUE (8) 248 249 typedef struct VubrDevRegion { 250 /* Guest Physical address. */ 251 uint64_t gpa; 252 /* Memory region size. */ 253 uint64_t size; 254 /* QEMU virtual address (userspace). */ 255 uint64_t qva; 256 /* Starting offset in our mmaped space. */ 257 uint64_t mmap_offset; 258 /* Start address of mmaped space. */ 259 uint64_t mmap_addr; 260 } VubrDevRegion; 261 262 typedef struct VubrDev { 263 int sock; 264 Dispatcher dispatcher; 265 uint32_t nregions; 266 VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; 267 VubrVirtq vq[MAX_NR_VIRTQUEUE]; 268 int backend_udp_sock; 269 struct sockaddr_in backend_udp_dest; 270 } VubrDev; 271 272 static const char *vubr_request_str[] = { 273 [VHOST_USER_NONE] = "VHOST_USER_NONE", 274 [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", 275 [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", 276 [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", 277 [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", 278 [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", 279 [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", 280 [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", 281 [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", 282 [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", 283 [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", 284 [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", 285 [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", 286 [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", 287 [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", 288 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", 289 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", 290 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", 291 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", 292 [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", 293 [VHOST_USER_MAX] = "VHOST_USER_MAX", 294 }; 295 296 static void 297 print_buffer(uint8_t *buf, size_t len) 298 { 299 int i; 300 printf("Raw buffer:\n"); 301 for (i = 0; i < len; i++) { 302 if (i % 16 == 0) { 303 printf("\n"); 304 } 305 if (i % 4 == 0) { 306 printf(" "); 307 } 308 printf("%02x ", buf[i]); 309 } 310 printf("\n............................................................\n"); 311 } 312 313 /* Translate guest physical address to our virtual address. */ 314 static uint64_t 315 gpa_to_va(VubrDev *dev, uint64_t guest_addr) 316 { 317 int i; 318 319 /* Find matching memory region. */ 320 for (i = 0; i < dev->nregions; i++) { 321 VubrDevRegion *r = &dev->regions[i]; 322 323 if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { 324 return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; 325 } 326 } 327 328 assert(!"address not found in regions"); 329 return 0; 330 } 331 332 /* Translate qemu virtual address to our virtual address. */ 333 static uint64_t 334 qva_to_va(VubrDev *dev, uint64_t qemu_addr) 335 { 336 int i; 337 338 /* Find matching memory region. */ 339 for (i = 0; i < dev->nregions; i++) { 340 VubrDevRegion *r = &dev->regions[i]; 341 342 if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) { 343 return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset; 344 } 345 } 346 347 assert(!"address not found in regions"); 348 return 0; 349 } 350 351 static void 352 vubr_message_read(int conn_fd, VhostUserMsg *vmsg) 353 { 354 char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; 355 struct iovec iov = { 356 .iov_base = (char *)vmsg, 357 .iov_len = VHOST_USER_HDR_SIZE, 358 }; 359 struct msghdr msg = { 360 .msg_iov = &iov, 361 .msg_iovlen = 1, 362 .msg_control = control, 363 .msg_controllen = sizeof(control), 364 }; 365 size_t fd_size; 366 struct cmsghdr *cmsg; 367 int rc; 368 369 rc = recvmsg(conn_fd, &msg, 0); 370 371 if (rc <= 0) { 372 vubr_die("recvmsg"); 373 } 374 375 vmsg->fd_num = 0; 376 for (cmsg = CMSG_FIRSTHDR(&msg); 377 cmsg != NULL; 378 cmsg = CMSG_NXTHDR(&msg, cmsg)) 379 { 380 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { 381 fd_size = cmsg->cmsg_len - CMSG_LEN(0); 382 vmsg->fd_num = fd_size / sizeof(int); 383 memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); 384 break; 385 } 386 } 387 388 if (vmsg->size > sizeof(vmsg->payload)) { 389 fprintf(stderr, 390 "Error: too big message request: %d, size: vmsg->size: %u, " 391 "while sizeof(vmsg->payload) = %lu\n", 392 vmsg->request, vmsg->size, sizeof(vmsg->payload)); 393 exit(1); 394 } 395 396 if (vmsg->size) { 397 rc = read(conn_fd, &vmsg->payload, vmsg->size); 398 if (rc <= 0) { 399 vubr_die("recvmsg"); 400 } 401 402 assert(rc == vmsg->size); 403 } 404 } 405 406 static void 407 vubr_message_write(int conn_fd, VhostUserMsg *vmsg) 408 { 409 int rc; 410 411 do { 412 rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size); 413 } while (rc < 0 && errno == EINTR); 414 415 if (rc < 0) { 416 vubr_die("write"); 417 } 418 } 419 420 static void 421 vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len) 422 { 423 int slen = sizeof(struct sockaddr_in); 424 425 if (sendto(dev->backend_udp_sock, buf, len, 0, 426 (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) { 427 vubr_die("sendto()"); 428 } 429 } 430 431 static int 432 vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen) 433 { 434 int slen = sizeof(struct sockaddr_in); 435 int rc; 436 437 rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0, 438 (struct sockaddr *) &dev->backend_udp_dest, 439 (socklen_t *)&slen); 440 if (rc == -1) { 441 vubr_die("recvfrom()"); 442 } 443 444 return rc; 445 } 446 447 static void 448 vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) 449 { 450 int hdrlen = sizeof(struct virtio_net_hdr_v1); 451 452 if (VHOST_USER_BRIDGE_DEBUG) { 453 print_buffer(buf, len); 454 } 455 vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen); 456 } 457 458 /* Kick the guest if necessary. */ 459 static void 460 vubr_virtqueue_kick(VubrVirtq *vq) 461 { 462 if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 463 DPRINT("Kicking the guest...\n"); 464 eventfd_write(vq->call_fd, 1); 465 } 466 } 467 468 static void 469 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) 470 { 471 struct vring_desc *desc = vq->desc; 472 struct vring_avail *avail = vq->avail; 473 struct vring_used *used = vq->used; 474 475 unsigned int size = vq->size; 476 477 uint16_t avail_index = atomic_mb_read(&avail->idx); 478 479 /* We check the available descriptors before posting the 480 * buffer, so here we assume that enough available 481 * descriptors. */ 482 assert(vq->last_avail_index != avail_index); 483 uint16_t a_index = vq->last_avail_index % size; 484 uint16_t u_index = vq->last_used_index % size; 485 uint16_t d_index = avail->ring[a_index]; 486 487 int i = d_index; 488 489 DPRINT("Post packet to guest on vq:\n"); 490 DPRINT(" size = %d\n", vq->size); 491 DPRINT(" last_avail_index = %d\n", vq->last_avail_index); 492 DPRINT(" last_used_index = %d\n", vq->last_used_index); 493 DPRINT(" a_index = %d\n", a_index); 494 DPRINT(" u_index = %d\n", u_index); 495 DPRINT(" d_index = %d\n", d_index); 496 DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); 497 DPRINT(" desc[%d].len = %d\n", i, desc[i].len); 498 DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); 499 DPRINT(" avail->idx = %d\n", avail_index); 500 DPRINT(" used->idx = %d\n", used->idx); 501 502 if (!(desc[i].flags & VRING_DESC_F_WRITE)) { 503 /* FIXME: we should find writable descriptor. */ 504 fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); 505 exit(1); 506 } 507 508 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); 509 uint32_t chunk_len = desc[i].len; 510 511 if (len <= chunk_len) { 512 memcpy(chunk_start, buf, len); 513 } else { 514 fprintf(stderr, 515 "Received too long packet from the backend. Dropping...\n"); 516 return; 517 } 518 519 /* Add descriptor to the used ring. */ 520 used->ring[u_index].id = d_index; 521 used->ring[u_index].len = len; 522 523 vq->last_avail_index++; 524 vq->last_used_index++; 525 526 atomic_mb_set(&used->idx, vq->last_used_index); 527 528 /* Kick the guest if necessary. */ 529 vubr_virtqueue_kick(vq); 530 } 531 532 static int 533 vubr_process_desc(VubrDev *dev, VubrVirtq *vq) 534 { 535 struct vring_desc *desc = vq->desc; 536 struct vring_avail *avail = vq->avail; 537 struct vring_used *used = vq->used; 538 539 unsigned int size = vq->size; 540 541 uint16_t a_index = vq->last_avail_index % size; 542 uint16_t u_index = vq->last_used_index % size; 543 uint16_t d_index = avail->ring[a_index]; 544 545 uint32_t i, len = 0; 546 size_t buf_size = 4096; 547 uint8_t buf[4096]; 548 549 DPRINT("Chunks: "); 550 i = d_index; 551 do { 552 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); 553 uint32_t chunk_len = desc[i].len; 554 555 if (len + chunk_len < buf_size) { 556 memcpy(buf + len, chunk_start, chunk_len); 557 DPRINT("%d ", chunk_len); 558 } else { 559 fprintf(stderr, "Error: too long packet. Dropping...\n"); 560 break; 561 } 562 563 len += chunk_len; 564 565 if (!(desc[i].flags & VRING_DESC_F_NEXT)) { 566 break; 567 } 568 569 i = desc[i].next; 570 } while (1); 571 DPRINT("\n"); 572 573 if (!len) { 574 return -1; 575 } 576 577 /* Add descriptor to the used ring. */ 578 used->ring[u_index].id = d_index; 579 used->ring[u_index].len = len; 580 581 vubr_consume_raw_packet(dev, buf, len); 582 583 return 0; 584 } 585 586 static void 587 vubr_process_avail(VubrDev *dev, VubrVirtq *vq) 588 { 589 struct vring_avail *avail = vq->avail; 590 struct vring_used *used = vq->used; 591 592 while (vq->last_avail_index != atomic_mb_read(&avail->idx)) { 593 vubr_process_desc(dev, vq); 594 vq->last_avail_index++; 595 vq->last_used_index++; 596 } 597 598 atomic_mb_set(&used->idx, vq->last_used_index); 599 } 600 601 static void 602 vubr_backend_recv_cb(int sock, void *ctx) 603 { 604 VubrDev *dev = (VubrDev *) ctx; 605 VubrVirtq *rx_vq = &dev->vq[0]; 606 uint8_t buf[4096]; 607 struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf; 608 int hdrlen = sizeof(struct virtio_net_hdr_v1); 609 int buflen = sizeof(buf); 610 int len; 611 612 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); 613 614 uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); 615 616 /* If there is no available descriptors, just do nothing. 617 * The buffer will be handled by next arrived UDP packet, 618 * or next kick on receive virtq. */ 619 if (rx_vq->last_avail_index == avail_index) { 620 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n"); 621 return; 622 } 623 624 len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen); 625 626 *hdr = (struct virtio_net_hdr_v1) { }; 627 hdr->num_buffers = 1; 628 vubr_post_buffer(dev, rx_vq, buf, len + hdrlen); 629 } 630 631 static void 632 vubr_kick_cb(int sock, void *ctx) 633 { 634 VubrDev *dev = (VubrDev *) ctx; 635 eventfd_t kick_data; 636 ssize_t rc; 637 638 rc = eventfd_read(sock, &kick_data); 639 if (rc == -1) { 640 vubr_die("eventfd_read()"); 641 } else { 642 DPRINT("Got kick_data: %016"PRIx64"\n", kick_data); 643 vubr_process_avail(dev, &dev->vq[1]); 644 } 645 } 646 647 static int 648 vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg) 649 { 650 DPRINT("Function %s() not implemented yet.\n", __func__); 651 return 0; 652 } 653 654 static int 655 vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg) 656 { 657 vmsg->payload.u64 = 658 ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | 659 (1ULL << VIRTIO_NET_F_CTRL_VQ) | 660 (1ULL << VIRTIO_NET_F_CTRL_RX) | 661 (1ULL << VHOST_F_LOG_ALL)); 662 vmsg->size = sizeof(vmsg->payload.u64); 663 664 DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 665 666 /* reply */ 667 return 1; 668 } 669 670 static int 671 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) 672 { 673 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 674 return 0; 675 } 676 677 static int 678 vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg) 679 { 680 return 0; 681 } 682 683 static int 684 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg) 685 { 686 DPRINT("Function %s() not implemented yet.\n", __func__); 687 return 0; 688 } 689 690 static int 691 vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg) 692 { 693 int i; 694 VhostUserMemory *memory = &vmsg->payload.memory; 695 dev->nregions = memory->nregions; 696 697 DPRINT("Nregions: %d\n", memory->nregions); 698 for (i = 0; i < dev->nregions; i++) { 699 void *mmap_addr; 700 VhostUserMemoryRegion *msg_region = &memory->regions[i]; 701 VubrDevRegion *dev_region = &dev->regions[i]; 702 703 DPRINT("Region %d\n", i); 704 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", 705 msg_region->guest_phys_addr); 706 DPRINT(" memory_size: 0x%016"PRIx64"\n", 707 msg_region->memory_size); 708 DPRINT(" userspace_addr 0x%016"PRIx64"\n", 709 msg_region->userspace_addr); 710 DPRINT(" mmap_offset 0x%016"PRIx64"\n", 711 msg_region->mmap_offset); 712 713 dev_region->gpa = msg_region->guest_phys_addr; 714 dev_region->size = msg_region->memory_size; 715 dev_region->qva = msg_region->userspace_addr; 716 dev_region->mmap_offset = msg_region->mmap_offset; 717 718 /* We don't use offset argument of mmap() since the 719 * mapped address has to be page aligned, and we use huge 720 * pages. */ 721 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, 722 PROT_READ | PROT_WRITE, MAP_SHARED, 723 vmsg->fds[i], 0); 724 725 if (mmap_addr == MAP_FAILED) { 726 vubr_die("mmap"); 727 } 728 729 dev_region->mmap_addr = (uint64_t) mmap_addr; 730 DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr); 731 } 732 733 return 0; 734 } 735 736 static int 737 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg) 738 { 739 DPRINT("Function %s() not implemented yet.\n", __func__); 740 return 0; 741 } 742 743 static int 744 vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg) 745 { 746 DPRINT("Function %s() not implemented yet.\n", __func__); 747 return 0; 748 } 749 750 static int 751 vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg) 752 { 753 unsigned int index = vmsg->payload.state.index; 754 unsigned int num = vmsg->payload.state.num; 755 756 DPRINT("State.index: %d\n", index); 757 DPRINT("State.num: %d\n", num); 758 dev->vq[index].size = num; 759 return 0; 760 } 761 762 static int 763 vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg) 764 { 765 struct vhost_vring_addr *vra = &vmsg->payload.addr; 766 unsigned int index = vra->index; 767 VubrVirtq *vq = &dev->vq[index]; 768 769 DPRINT("vhost_vring_addr:\n"); 770 DPRINT(" index: %d\n", vra->index); 771 DPRINT(" flags: %d\n", vra->flags); 772 DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr); 773 DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr); 774 DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr); 775 DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr); 776 777 vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr); 778 vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr); 779 vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr); 780 781 DPRINT("Setting virtq addresses:\n"); 782 DPRINT(" vring_desc at %p\n", vq->desc); 783 DPRINT(" vring_used at %p\n", vq->used); 784 DPRINT(" vring_avail at %p\n", vq->avail); 785 786 vq->last_used_index = vq->used->idx; 787 return 0; 788 } 789 790 static int 791 vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) 792 { 793 unsigned int index = vmsg->payload.state.index; 794 unsigned int num = vmsg->payload.state.num; 795 796 DPRINT("State.index: %d\n", index); 797 DPRINT("State.num: %d\n", num); 798 dev->vq[index].last_avail_index = num; 799 800 return 0; 801 } 802 803 static int 804 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg) 805 { 806 DPRINT("Function %s() not implemented yet.\n", __func__); 807 return 0; 808 } 809 810 static int 811 vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg) 812 { 813 uint64_t u64_arg = vmsg->payload.u64; 814 int index = u64_arg & VHOST_USER_VRING_IDX_MASK; 815 816 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 817 818 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); 819 assert(vmsg->fd_num == 1); 820 821 dev->vq[index].kick_fd = vmsg->fds[0]; 822 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index); 823 824 if (index % 2 == 1) { 825 /* TX queue. */ 826 dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd, 827 dev, vubr_kick_cb); 828 829 DPRINT("Waiting for kicks on fd: %d for vq: %d\n", 830 dev->vq[index].kick_fd, index); 831 } 832 return 0; 833 } 834 835 static int 836 vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg) 837 { 838 uint64_t u64_arg = vmsg->payload.u64; 839 int index = u64_arg & VHOST_USER_VRING_IDX_MASK; 840 841 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 842 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0); 843 assert(vmsg->fd_num == 1); 844 845 dev->vq[index].call_fd = vmsg->fds[0]; 846 DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index); 847 848 return 0; 849 } 850 851 static int 852 vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg) 853 { 854 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 855 return 0; 856 } 857 858 static int 859 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) 860 { 861 /* FIXME: unimplented */ 862 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 863 return 0; 864 } 865 866 static int 867 vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg) 868 { 869 /* FIXME: unimplented */ 870 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 871 return 0; 872 } 873 874 static int 875 vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg) 876 { 877 DPRINT("Function %s() not implemented yet.\n", __func__); 878 return 0; 879 } 880 881 static int 882 vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg) 883 { 884 DPRINT("Function %s() not implemented yet.\n", __func__); 885 return 0; 886 } 887 888 static int 889 vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg) 890 { 891 DPRINT("Function %s() not implemented yet.\n", __func__); 892 return 0; 893 } 894 895 static int 896 vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg) 897 { 898 /* Print out generic part of the request. */ 899 DPRINT( 900 "================== Vhost user message from QEMU ==================\n"); 901 DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request], 902 vmsg->request); 903 DPRINT("Flags: 0x%x\n", vmsg->flags); 904 DPRINT("Size: %d\n", vmsg->size); 905 906 if (vmsg->fd_num) { 907 int i; 908 DPRINT("Fds:"); 909 for (i = 0; i < vmsg->fd_num; i++) { 910 DPRINT(" %d", vmsg->fds[i]); 911 } 912 DPRINT("\n"); 913 } 914 915 switch (vmsg->request) { 916 case VHOST_USER_NONE: 917 return vubr_none_exec(dev, vmsg); 918 case VHOST_USER_GET_FEATURES: 919 return vubr_get_features_exec(dev, vmsg); 920 case VHOST_USER_SET_FEATURES: 921 return vubr_set_features_exec(dev, vmsg); 922 case VHOST_USER_SET_OWNER: 923 return vubr_set_owner_exec(dev, vmsg); 924 case VHOST_USER_RESET_OWNER: 925 return vubr_reset_device_exec(dev, vmsg); 926 case VHOST_USER_SET_MEM_TABLE: 927 return vubr_set_mem_table_exec(dev, vmsg); 928 case VHOST_USER_SET_LOG_BASE: 929 return vubr_set_log_base_exec(dev, vmsg); 930 case VHOST_USER_SET_LOG_FD: 931 return vubr_set_log_fd_exec(dev, vmsg); 932 case VHOST_USER_SET_VRING_NUM: 933 return vubr_set_vring_num_exec(dev, vmsg); 934 case VHOST_USER_SET_VRING_ADDR: 935 return vubr_set_vring_addr_exec(dev, vmsg); 936 case VHOST_USER_SET_VRING_BASE: 937 return vubr_set_vring_base_exec(dev, vmsg); 938 case VHOST_USER_GET_VRING_BASE: 939 return vubr_get_vring_base_exec(dev, vmsg); 940 case VHOST_USER_SET_VRING_KICK: 941 return vubr_set_vring_kick_exec(dev, vmsg); 942 case VHOST_USER_SET_VRING_CALL: 943 return vubr_set_vring_call_exec(dev, vmsg); 944 case VHOST_USER_SET_VRING_ERR: 945 return vubr_set_vring_err_exec(dev, vmsg); 946 case VHOST_USER_GET_PROTOCOL_FEATURES: 947 return vubr_get_protocol_features_exec(dev, vmsg); 948 case VHOST_USER_SET_PROTOCOL_FEATURES: 949 return vubr_set_protocol_features_exec(dev, vmsg); 950 case VHOST_USER_GET_QUEUE_NUM: 951 return vubr_get_queue_num_exec(dev, vmsg); 952 case VHOST_USER_SET_VRING_ENABLE: 953 return vubr_set_vring_enable_exec(dev, vmsg); 954 case VHOST_USER_SEND_RARP: 955 return vubr_send_rarp_exec(dev, vmsg); 956 957 case VHOST_USER_MAX: 958 assert(vmsg->request != VHOST_USER_MAX); 959 } 960 return 0; 961 } 962 963 static void 964 vubr_receive_cb(int sock, void *ctx) 965 { 966 VubrDev *dev = (VubrDev *) ctx; 967 VhostUserMsg vmsg; 968 int reply_requested; 969 970 vubr_message_read(sock, &vmsg); 971 reply_requested = vubr_execute_request(dev, &vmsg); 972 if (reply_requested) { 973 /* Set the version in the flags when sending the reply */ 974 vmsg.flags &= ~VHOST_USER_VERSION_MASK; 975 vmsg.flags |= VHOST_USER_VERSION; 976 vmsg.flags |= VHOST_USER_REPLY_MASK; 977 vubr_message_write(sock, &vmsg); 978 } 979 } 980 981 static void 982 vubr_accept_cb(int sock, void *ctx) 983 { 984 VubrDev *dev = (VubrDev *)ctx; 985 int conn_fd; 986 struct sockaddr_un un; 987 socklen_t len = sizeof(un); 988 989 conn_fd = accept(sock, (struct sockaddr *) &un, &len); 990 if (conn_fd == -1) { 991 vubr_die("accept()"); 992 } 993 DPRINT("Got connection from remote peer on sock %d\n", conn_fd); 994 dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); 995 } 996 997 static VubrDev * 998 vubr_new(const char *path) 999 { 1000 VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev)); 1001 dev->nregions = 0; 1002 int i; 1003 struct sockaddr_un un; 1004 size_t len; 1005 1006 for (i = 0; i < MAX_NR_VIRTQUEUE; i++) { 1007 dev->vq[i] = (VubrVirtq) { 1008 .call_fd = -1, .kick_fd = -1, 1009 .size = 0, 1010 .last_avail_index = 0, .last_used_index = 0, 1011 .desc = 0, .avail = 0, .used = 0, 1012 }; 1013 } 1014 1015 /* Get a UNIX socket. */ 1016 dev->sock = socket(AF_UNIX, SOCK_STREAM, 0); 1017 if (dev->sock == -1) { 1018 vubr_die("socket"); 1019 } 1020 1021 un.sun_family = AF_UNIX; 1022 strcpy(un.sun_path, path); 1023 len = sizeof(un.sun_family) + strlen(path); 1024 unlink(path); 1025 1026 if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) { 1027 vubr_die("bind"); 1028 } 1029 1030 if (listen(dev->sock, 1) == -1) { 1031 vubr_die("listen"); 1032 } 1033 1034 dispatcher_init(&dev->dispatcher); 1035 dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, 1036 vubr_accept_cb); 1037 1038 DPRINT("Waiting for connections on UNIX socket %s ...\n", path); 1039 return dev; 1040 } 1041 1042 static void 1043 vubr_backend_udp_setup(VubrDev *dev, 1044 const char *local_host, 1045 uint16_t local_port, 1046 const char *dest_host, 1047 uint16_t dest_port) 1048 { 1049 int sock; 1050 struct sockaddr_in si_local = { 1051 .sin_family = AF_INET, 1052 .sin_port = htons(local_port), 1053 }; 1054 1055 if (inet_aton(local_host, &si_local.sin_addr) == 0) { 1056 fprintf(stderr, "inet_aton() failed.\n"); 1057 exit(1); 1058 } 1059 1060 /* setup destination for sends */ 1061 dev->backend_udp_dest = (struct sockaddr_in) { 1062 .sin_family = AF_INET, 1063 .sin_port = htons(dest_port), 1064 }; 1065 if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) { 1066 fprintf(stderr, "inet_aton() failed.\n"); 1067 exit(1); 1068 } 1069 1070 sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); 1071 if (sock == -1) { 1072 vubr_die("socket"); 1073 } 1074 1075 if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) { 1076 vubr_die("bind"); 1077 } 1078 1079 dev->backend_udp_sock = sock; 1080 dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb); 1081 DPRINT("Waiting for data from udp backend on %s:%d...\n", 1082 local_host, local_port); 1083 } 1084 1085 static void 1086 vubr_run(VubrDev *dev) 1087 { 1088 while (1) { 1089 /* timeout 200ms */ 1090 dispatcher_wait(&dev->dispatcher, 200000); 1091 /* Here one can try polling strategy. */ 1092 } 1093 } 1094 1095 int 1096 main(int argc, char *argv[]) 1097 { 1098 VubrDev *dev; 1099 1100 dev = vubr_new("/tmp/vubr.sock"); 1101 if (!dev) { 1102 return 1; 1103 } 1104 1105 vubr_backend_udp_setup(dev, 1106 "127.0.0.1", 4444, 1107 "127.0.0.1", 5555); 1108 vubr_run(dev); 1109 return 0; 1110 } 1111