xref: /openbmc/qemu/tests/vhost-user-bridge.c (revision 61b9251a)
1 /*
2  * Vhost User Bridge
3  *
4  * Copyright (c) 2015 Red Hat, Inc.
5  *
6  * Authors:
7  *  Victor Kaplansky <victork@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  */
12 
13 /*
14  * TODO:
15  *     - main should get parameters from the command line.
16  *     - implement all request handlers.
17  *     - test for broken requests and virtqueue.
18  *     - implement features defined by Virtio 1.0 spec.
19  *     - support mergeable buffers and indirect descriptors.
20  *     - implement RESET_DEVICE request.
21  *     - implement clean shutdown.
22  *     - implement non-blocking writes to UDP backend.
23  *     - implement polling strategy.
24  */
25 
26 #include <stddef.h>
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <inttypes.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <errno.h>
35 #include <sys/types.h>
36 #include <sys/socket.h>
37 #include <sys/un.h>
38 #include <sys/unistd.h>
39 #include <sys/mman.h>
40 #include <sys/eventfd.h>
41 #include <arpa/inet.h>
42 
43 #include <linux/vhost.h>
44 
45 #include "qemu/atomic.h"
46 #include "standard-headers/linux/virtio_net.h"
47 #include "standard-headers/linux/virtio_ring.h"
48 
49 #define VHOST_USER_BRIDGE_DEBUG 1
50 
51 #define DPRINT(...) \
52     do { \
53         if (VHOST_USER_BRIDGE_DEBUG) { \
54             printf(__VA_ARGS__); \
55         } \
56     } while (0)
57 
58 typedef void (*CallbackFunc)(int sock, void *ctx);
59 
60 typedef struct Event {
61     void *ctx;
62     CallbackFunc callback;
63 } Event;
64 
65 typedef struct Dispatcher {
66     int max_sock;
67     fd_set fdset;
68     Event events[FD_SETSIZE];
69 } Dispatcher;
70 
71 static void
72 vubr_die(const char *s)
73 {
74     perror(s);
75     exit(1);
76 }
77 
78 static int
79 dispatcher_init(Dispatcher *dispr)
80 {
81     FD_ZERO(&dispr->fdset);
82     dispr->max_sock = -1;
83     return 0;
84 }
85 
86 static int
87 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
88 {
89     if (sock >= FD_SETSIZE) {
90         fprintf(stderr,
91                 "Error: Failed to add new event. sock %d should be less than %d\n",
92                 sock, FD_SETSIZE);
93         return -1;
94     }
95 
96     dispr->events[sock].ctx = ctx;
97     dispr->events[sock].callback = cb;
98 
99     FD_SET(sock, &dispr->fdset);
100     if (sock > dispr->max_sock) {
101         dispr->max_sock = sock;
102     }
103     DPRINT("Added sock %d for watching. max_sock: %d\n",
104            sock, dispr->max_sock);
105     return 0;
106 }
107 
108 #if 0
109 /* dispatcher_remove() is not currently in use but may be useful
110  * in the future. */
111 static int
112 dispatcher_remove(Dispatcher *dispr, int sock)
113 {
114     if (sock >= FD_SETSIZE) {
115         fprintf(stderr,
116                 "Error: Failed to remove event. sock %d should be less than %d\n",
117                 sock, FD_SETSIZE);
118         return -1;
119     }
120 
121     FD_CLR(sock, &dispr->fdset);
122     return 0;
123 }
124 #endif
125 
126 /* timeout in us */
127 static int
128 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
129 {
130     struct timeval tv;
131     tv.tv_sec = timeout / 1000000;
132     tv.tv_usec = timeout % 1000000;
133 
134     fd_set fdset = dispr->fdset;
135 
136     /* wait until some of sockets become readable. */
137     int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
138 
139     if (rc == -1) {
140         vubr_die("select");
141     }
142 
143     /* Timeout */
144     if (rc == 0) {
145         return 0;
146     }
147 
148     /* Now call callback for every ready socket. */
149 
150     int sock;
151     for (sock = 0; sock < dispr->max_sock + 1; sock++)
152         if (FD_ISSET(sock, &fdset)) {
153             Event *e = &dispr->events[sock];
154             e->callback(sock, e->ctx);
155         }
156 
157     return 0;
158 }
159 
160 typedef struct VubrVirtq {
161     int call_fd;
162     int kick_fd;
163     uint32_t size;
164     uint16_t last_avail_index;
165     uint16_t last_used_index;
166     struct vring_desc *desc;
167     struct vring_avail *avail;
168     struct vring_used *used;
169 } VubrVirtq;
170 
171 /* Based on qemu/hw/virtio/vhost-user.c */
172 
173 #define VHOST_MEMORY_MAX_NREGIONS    8
174 #define VHOST_USER_F_PROTOCOL_FEATURES 30
175 
176 enum VhostUserProtocolFeature {
177     VHOST_USER_PROTOCOL_F_MQ = 0,
178     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
179     VHOST_USER_PROTOCOL_F_RARP = 2,
180 
181     VHOST_USER_PROTOCOL_F_MAX
182 };
183 
184 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
185 
186 typedef enum VhostUserRequest {
187     VHOST_USER_NONE = 0,
188     VHOST_USER_GET_FEATURES = 1,
189     VHOST_USER_SET_FEATURES = 2,
190     VHOST_USER_SET_OWNER = 3,
191     VHOST_USER_RESET_OWNER = 4,
192     VHOST_USER_SET_MEM_TABLE = 5,
193     VHOST_USER_SET_LOG_BASE = 6,
194     VHOST_USER_SET_LOG_FD = 7,
195     VHOST_USER_SET_VRING_NUM = 8,
196     VHOST_USER_SET_VRING_ADDR = 9,
197     VHOST_USER_SET_VRING_BASE = 10,
198     VHOST_USER_GET_VRING_BASE = 11,
199     VHOST_USER_SET_VRING_KICK = 12,
200     VHOST_USER_SET_VRING_CALL = 13,
201     VHOST_USER_SET_VRING_ERR = 14,
202     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
203     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
204     VHOST_USER_GET_QUEUE_NUM = 17,
205     VHOST_USER_SET_VRING_ENABLE = 18,
206     VHOST_USER_SEND_RARP = 19,
207     VHOST_USER_MAX
208 } VhostUserRequest;
209 
210 typedef struct VhostUserMemoryRegion {
211     uint64_t guest_phys_addr;
212     uint64_t memory_size;
213     uint64_t userspace_addr;
214     uint64_t mmap_offset;
215 } VhostUserMemoryRegion;
216 
217 typedef struct VhostUserMemory {
218     uint32_t nregions;
219     uint32_t padding;
220     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
221 } VhostUserMemory;
222 
223 typedef struct VhostUserMsg {
224     VhostUserRequest request;
225 
226 #define VHOST_USER_VERSION_MASK     (0x3)
227 #define VHOST_USER_REPLY_MASK       (0x1<<2)
228     uint32_t flags;
229     uint32_t size; /* the following payload size */
230     union {
231 #define VHOST_USER_VRING_IDX_MASK   (0xff)
232 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
233         uint64_t u64;
234         struct vhost_vring_state state;
235         struct vhost_vring_addr addr;
236         VhostUserMemory memory;
237     } payload;
238     int fds[VHOST_MEMORY_MAX_NREGIONS];
239     int fd_num;
240 } QEMU_PACKED VhostUserMsg;
241 
242 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
243 
244 /* The version of the protocol we support */
245 #define VHOST_USER_VERSION    (0x1)
246 
247 #define MAX_NR_VIRTQUEUE (8)
248 
249 typedef struct VubrDevRegion {
250     /* Guest Physical address. */
251     uint64_t gpa;
252     /* Memory region size. */
253     uint64_t size;
254     /* QEMU virtual address (userspace). */
255     uint64_t qva;
256     /* Starting offset in our mmaped space. */
257     uint64_t mmap_offset;
258     /* Start address of mmaped space. */
259     uint64_t mmap_addr;
260 } VubrDevRegion;
261 
262 typedef struct VubrDev {
263     int sock;
264     Dispatcher dispatcher;
265     uint32_t nregions;
266     VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
267     VubrVirtq vq[MAX_NR_VIRTQUEUE];
268     int backend_udp_sock;
269     struct sockaddr_in backend_udp_dest;
270 } VubrDev;
271 
272 static const char *vubr_request_str[] = {
273     [VHOST_USER_NONE]                   =  "VHOST_USER_NONE",
274     [VHOST_USER_GET_FEATURES]           =  "VHOST_USER_GET_FEATURES",
275     [VHOST_USER_SET_FEATURES]           =  "VHOST_USER_SET_FEATURES",
276     [VHOST_USER_SET_OWNER]              =  "VHOST_USER_SET_OWNER",
277     [VHOST_USER_RESET_OWNER]           =  "VHOST_USER_RESET_OWNER",
278     [VHOST_USER_SET_MEM_TABLE]          =  "VHOST_USER_SET_MEM_TABLE",
279     [VHOST_USER_SET_LOG_BASE]           =  "VHOST_USER_SET_LOG_BASE",
280     [VHOST_USER_SET_LOG_FD]             =  "VHOST_USER_SET_LOG_FD",
281     [VHOST_USER_SET_VRING_NUM]          =  "VHOST_USER_SET_VRING_NUM",
282     [VHOST_USER_SET_VRING_ADDR]         =  "VHOST_USER_SET_VRING_ADDR",
283     [VHOST_USER_SET_VRING_BASE]         =  "VHOST_USER_SET_VRING_BASE",
284     [VHOST_USER_GET_VRING_BASE]         =  "VHOST_USER_GET_VRING_BASE",
285     [VHOST_USER_SET_VRING_KICK]         =  "VHOST_USER_SET_VRING_KICK",
286     [VHOST_USER_SET_VRING_CALL]         =  "VHOST_USER_SET_VRING_CALL",
287     [VHOST_USER_SET_VRING_ERR]          =  "VHOST_USER_SET_VRING_ERR",
288     [VHOST_USER_GET_PROTOCOL_FEATURES]  =  "VHOST_USER_GET_PROTOCOL_FEATURES",
289     [VHOST_USER_SET_PROTOCOL_FEATURES]  =  "VHOST_USER_SET_PROTOCOL_FEATURES",
290     [VHOST_USER_GET_QUEUE_NUM]          =  "VHOST_USER_GET_QUEUE_NUM",
291     [VHOST_USER_SET_VRING_ENABLE]       =  "VHOST_USER_SET_VRING_ENABLE",
292     [VHOST_USER_SEND_RARP]              =  "VHOST_USER_SEND_RARP",
293     [VHOST_USER_MAX]                    =  "VHOST_USER_MAX",
294 };
295 
296 static void
297 print_buffer(uint8_t *buf, size_t len)
298 {
299     int i;
300     printf("Raw buffer:\n");
301     for (i = 0; i < len; i++) {
302         if (i % 16 == 0) {
303             printf("\n");
304         }
305         if (i % 4 == 0) {
306             printf("   ");
307         }
308         printf("%02x ", buf[i]);
309     }
310     printf("\n............................................................\n");
311 }
312 
313 /* Translate guest physical address to our virtual address.  */
314 static uint64_t
315 gpa_to_va(VubrDev *dev, uint64_t guest_addr)
316 {
317     int i;
318 
319     /* Find matching memory region.  */
320     for (i = 0; i < dev->nregions; i++) {
321         VubrDevRegion *r = &dev->regions[i];
322 
323         if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
324             return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
325         }
326     }
327 
328     assert(!"address not found in regions");
329     return 0;
330 }
331 
332 /* Translate qemu virtual address to our virtual address.  */
333 static uint64_t
334 qva_to_va(VubrDev *dev, uint64_t qemu_addr)
335 {
336     int i;
337 
338     /* Find matching memory region.  */
339     for (i = 0; i < dev->nregions; i++) {
340         VubrDevRegion *r = &dev->regions[i];
341 
342         if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
343             return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
344         }
345     }
346 
347     assert(!"address not found in regions");
348     return 0;
349 }
350 
351 static void
352 vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
353 {
354     char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
355     struct iovec iov = {
356         .iov_base = (char *)vmsg,
357         .iov_len = VHOST_USER_HDR_SIZE,
358     };
359     struct msghdr msg = {
360         .msg_iov = &iov,
361         .msg_iovlen = 1,
362         .msg_control = control,
363         .msg_controllen = sizeof(control),
364     };
365     size_t fd_size;
366     struct cmsghdr *cmsg;
367     int rc;
368 
369     rc = recvmsg(conn_fd, &msg, 0);
370 
371     if (rc <= 0) {
372         vubr_die("recvmsg");
373     }
374 
375     vmsg->fd_num = 0;
376     for (cmsg = CMSG_FIRSTHDR(&msg);
377          cmsg != NULL;
378          cmsg = CMSG_NXTHDR(&msg, cmsg))
379     {
380         if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
381             fd_size = cmsg->cmsg_len - CMSG_LEN(0);
382             vmsg->fd_num = fd_size / sizeof(int);
383             memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
384             break;
385         }
386     }
387 
388     if (vmsg->size > sizeof(vmsg->payload)) {
389         fprintf(stderr,
390                 "Error: too big message request: %d, size: vmsg->size: %u, "
391                 "while sizeof(vmsg->payload) = %lu\n",
392                 vmsg->request, vmsg->size, sizeof(vmsg->payload));
393         exit(1);
394     }
395 
396     if (vmsg->size) {
397         rc = read(conn_fd, &vmsg->payload, vmsg->size);
398         if (rc <= 0) {
399             vubr_die("recvmsg");
400         }
401 
402         assert(rc == vmsg->size);
403     }
404 }
405 
406 static void
407 vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
408 {
409     int rc;
410 
411     do {
412         rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
413     } while (rc < 0 && errno == EINTR);
414 
415     if (rc < 0) {
416         vubr_die("write");
417     }
418 }
419 
420 static void
421 vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
422 {
423     int slen = sizeof(struct sockaddr_in);
424 
425     if (sendto(dev->backend_udp_sock, buf, len, 0,
426                (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
427         vubr_die("sendto()");
428     }
429 }
430 
431 static int
432 vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
433 {
434     int slen = sizeof(struct sockaddr_in);
435     int rc;
436 
437     rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
438                   (struct sockaddr *) &dev->backend_udp_dest,
439                   (socklen_t *)&slen);
440     if (rc == -1) {
441         vubr_die("recvfrom()");
442     }
443 
444     return rc;
445 }
446 
447 static void
448 vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
449 {
450     int hdrlen = sizeof(struct virtio_net_hdr_v1);
451 
452     if (VHOST_USER_BRIDGE_DEBUG) {
453         print_buffer(buf, len);
454     }
455     vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
456 }
457 
458 /* Kick the guest if necessary. */
459 static void
460 vubr_virtqueue_kick(VubrVirtq *vq)
461 {
462     if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
463         DPRINT("Kicking the guest...\n");
464         eventfd_write(vq->call_fd, 1);
465     }
466 }
467 
468 static void
469 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
470 {
471     struct vring_desc *desc   = vq->desc;
472     struct vring_avail *avail = vq->avail;
473     struct vring_used *used   = vq->used;
474 
475     unsigned int size = vq->size;
476 
477     uint16_t avail_index = atomic_mb_read(&avail->idx);
478 
479     /* We check the available descriptors before posting the
480      * buffer, so here we assume that enough available
481      * descriptors. */
482     assert(vq->last_avail_index != avail_index);
483     uint16_t a_index = vq->last_avail_index % size;
484     uint16_t u_index = vq->last_used_index % size;
485     uint16_t d_index = avail->ring[a_index];
486 
487     int i = d_index;
488 
489     DPRINT("Post packet to guest on vq:\n");
490     DPRINT("    size             = %d\n", vq->size);
491     DPRINT("    last_avail_index = %d\n", vq->last_avail_index);
492     DPRINT("    last_used_index  = %d\n", vq->last_used_index);
493     DPRINT("    a_index = %d\n", a_index);
494     DPRINT("    u_index = %d\n", u_index);
495     DPRINT("    d_index = %d\n", d_index);
496     DPRINT("    desc[%d].addr    = 0x%016"PRIx64"\n", i, desc[i].addr);
497     DPRINT("    desc[%d].len     = %d\n", i, desc[i].len);
498     DPRINT("    desc[%d].flags   = %d\n", i, desc[i].flags);
499     DPRINT("    avail->idx = %d\n", avail_index);
500     DPRINT("    used->idx  = %d\n", used->idx);
501 
502     if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
503         /* FIXME: we should find writable descriptor. */
504         fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
505         exit(1);
506     }
507 
508     void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
509     uint32_t chunk_len = desc[i].len;
510 
511     if (len <= chunk_len) {
512         memcpy(chunk_start, buf, len);
513     } else {
514         fprintf(stderr,
515                 "Received too long packet from the backend. Dropping...\n");
516         return;
517     }
518 
519     /* Add descriptor to the used ring. */
520     used->ring[u_index].id = d_index;
521     used->ring[u_index].len = len;
522 
523     vq->last_avail_index++;
524     vq->last_used_index++;
525 
526     atomic_mb_set(&used->idx, vq->last_used_index);
527 
528     /* Kick the guest if necessary. */
529     vubr_virtqueue_kick(vq);
530 }
531 
532 static int
533 vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
534 {
535     struct vring_desc *desc   = vq->desc;
536     struct vring_avail *avail = vq->avail;
537     struct vring_used *used   = vq->used;
538 
539     unsigned int size = vq->size;
540 
541     uint16_t a_index = vq->last_avail_index % size;
542     uint16_t u_index = vq->last_used_index % size;
543     uint16_t d_index = avail->ring[a_index];
544 
545     uint32_t i, len = 0;
546     size_t buf_size = 4096;
547     uint8_t buf[4096];
548 
549     DPRINT("Chunks: ");
550     i = d_index;
551     do {
552         void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
553         uint32_t chunk_len = desc[i].len;
554 
555         if (len + chunk_len < buf_size) {
556             memcpy(buf + len, chunk_start, chunk_len);
557             DPRINT("%d ", chunk_len);
558         } else {
559             fprintf(stderr, "Error: too long packet. Dropping...\n");
560             break;
561         }
562 
563         len += chunk_len;
564 
565         if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
566             break;
567         }
568 
569         i = desc[i].next;
570     } while (1);
571     DPRINT("\n");
572 
573     if (!len) {
574         return -1;
575     }
576 
577     /* Add descriptor to the used ring. */
578     used->ring[u_index].id = d_index;
579     used->ring[u_index].len = len;
580 
581     vubr_consume_raw_packet(dev, buf, len);
582 
583     return 0;
584 }
585 
586 static void
587 vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
588 {
589     struct vring_avail *avail = vq->avail;
590     struct vring_used *used = vq->used;
591 
592     while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
593         vubr_process_desc(dev, vq);
594         vq->last_avail_index++;
595         vq->last_used_index++;
596     }
597 
598     atomic_mb_set(&used->idx, vq->last_used_index);
599 }
600 
601 static void
602 vubr_backend_recv_cb(int sock, void *ctx)
603 {
604     VubrDev *dev = (VubrDev *) ctx;
605     VubrVirtq *rx_vq = &dev->vq[0];
606     uint8_t buf[4096];
607     struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
608     int hdrlen = sizeof(struct virtio_net_hdr_v1);
609     int buflen = sizeof(buf);
610     int len;
611 
612     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
613 
614     uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
615 
616     /* If there is no available descriptors, just do nothing.
617      * The buffer will be handled by next arrived UDP packet,
618      * or next kick on receive virtq. */
619     if (rx_vq->last_avail_index == avail_index) {
620         DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
621         return;
622     }
623 
624     len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
625 
626     *hdr = (struct virtio_net_hdr_v1) { };
627     hdr->num_buffers = 1;
628     vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
629 }
630 
631 static void
632 vubr_kick_cb(int sock, void *ctx)
633 {
634     VubrDev *dev = (VubrDev *) ctx;
635     eventfd_t kick_data;
636     ssize_t rc;
637 
638     rc = eventfd_read(sock, &kick_data);
639     if (rc == -1) {
640         vubr_die("eventfd_read()");
641     } else {
642         DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
643         vubr_process_avail(dev, &dev->vq[1]);
644     }
645 }
646 
647 static int
648 vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
649 {
650     DPRINT("Function %s() not implemented yet.\n", __func__);
651     return 0;
652 }
653 
654 static int
655 vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
656 {
657     vmsg->payload.u64 =
658             ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
659              (1ULL << VIRTIO_NET_F_CTRL_VQ) |
660              (1ULL << VIRTIO_NET_F_CTRL_RX) |
661              (1ULL << VHOST_F_LOG_ALL));
662     vmsg->size = sizeof(vmsg->payload.u64);
663 
664     DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
665 
666     /* reply */
667     return 1;
668 }
669 
670 static int
671 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
672 {
673     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
674     return 0;
675 }
676 
677 static int
678 vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
679 {
680     return 0;
681 }
682 
683 static int
684 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
685 {
686     DPRINT("Function %s() not implemented yet.\n", __func__);
687     return 0;
688 }
689 
690 static int
691 vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
692 {
693     int i;
694     VhostUserMemory *memory = &vmsg->payload.memory;
695     dev->nregions = memory->nregions;
696 
697     DPRINT("Nregions: %d\n", memory->nregions);
698     for (i = 0; i < dev->nregions; i++) {
699         void *mmap_addr;
700         VhostUserMemoryRegion *msg_region = &memory->regions[i];
701         VubrDevRegion *dev_region = &dev->regions[i];
702 
703         DPRINT("Region %d\n", i);
704         DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
705                msg_region->guest_phys_addr);
706         DPRINT("    memory_size:     0x%016"PRIx64"\n",
707                msg_region->memory_size);
708         DPRINT("    userspace_addr   0x%016"PRIx64"\n",
709                msg_region->userspace_addr);
710         DPRINT("    mmap_offset      0x%016"PRIx64"\n",
711                msg_region->mmap_offset);
712 
713         dev_region->gpa         = msg_region->guest_phys_addr;
714         dev_region->size        = msg_region->memory_size;
715         dev_region->qva         = msg_region->userspace_addr;
716         dev_region->mmap_offset = msg_region->mmap_offset;
717 
718         /* We don't use offset argument of mmap() since the
719          * mapped address has to be page aligned, and we use huge
720          * pages.  */
721         mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
722                          PROT_READ | PROT_WRITE, MAP_SHARED,
723                          vmsg->fds[i], 0);
724 
725         if (mmap_addr == MAP_FAILED) {
726             vubr_die("mmap");
727         }
728 
729         dev_region->mmap_addr = (uint64_t) mmap_addr;
730         DPRINT("    mmap_addr:       0x%016"PRIx64"\n", dev_region->mmap_addr);
731     }
732 
733     return 0;
734 }
735 
736 static int
737 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
738 {
739     DPRINT("Function %s() not implemented yet.\n", __func__);
740     return 0;
741 }
742 
743 static int
744 vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
745 {
746     DPRINT("Function %s() not implemented yet.\n", __func__);
747     return 0;
748 }
749 
750 static int
751 vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
752 {
753     unsigned int index = vmsg->payload.state.index;
754     unsigned int num = vmsg->payload.state.num;
755 
756     DPRINT("State.index: %d\n", index);
757     DPRINT("State.num:   %d\n", num);
758     dev->vq[index].size = num;
759     return 0;
760 }
761 
762 static int
763 vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
764 {
765     struct vhost_vring_addr *vra = &vmsg->payload.addr;
766     unsigned int index = vra->index;
767     VubrVirtq *vq = &dev->vq[index];
768 
769     DPRINT("vhost_vring_addr:\n");
770     DPRINT("    index:  %d\n", vra->index);
771     DPRINT("    flags:  %d\n", vra->flags);
772     DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
773     DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
774     DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
775     DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
776 
777     vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
778     vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
779     vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
780 
781     DPRINT("Setting virtq addresses:\n");
782     DPRINT("    vring_desc  at %p\n", vq->desc);
783     DPRINT("    vring_used  at %p\n", vq->used);
784     DPRINT("    vring_avail at %p\n", vq->avail);
785 
786     vq->last_used_index = vq->used->idx;
787     return 0;
788 }
789 
790 static int
791 vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
792 {
793     unsigned int index = vmsg->payload.state.index;
794     unsigned int num = vmsg->payload.state.num;
795 
796     DPRINT("State.index: %d\n", index);
797     DPRINT("State.num:   %d\n", num);
798     dev->vq[index].last_avail_index = num;
799 
800     return 0;
801 }
802 
803 static int
804 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
805 {
806     DPRINT("Function %s() not implemented yet.\n", __func__);
807     return 0;
808 }
809 
810 static int
811 vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
812 {
813     uint64_t u64_arg = vmsg->payload.u64;
814     int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
815 
816     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
817 
818     assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
819     assert(vmsg->fd_num == 1);
820 
821     dev->vq[index].kick_fd = vmsg->fds[0];
822     DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
823 
824     if (index % 2 == 1) {
825         /* TX queue. */
826         dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
827                        dev, vubr_kick_cb);
828 
829         DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
830                dev->vq[index].kick_fd, index);
831     }
832     return 0;
833 }
834 
835 static int
836 vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
837 {
838     uint64_t u64_arg = vmsg->payload.u64;
839     int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
840 
841     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
842     assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
843     assert(vmsg->fd_num == 1);
844 
845     dev->vq[index].call_fd = vmsg->fds[0];
846     DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
847 
848     return 0;
849 }
850 
851 static int
852 vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
853 {
854     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
855     return 0;
856 }
857 
858 static int
859 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
860 {
861     /* FIXME: unimplented */
862     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
863     return 0;
864 }
865 
866 static int
867 vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
868 {
869     /* FIXME: unimplented */
870     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
871     return 0;
872 }
873 
874 static int
875 vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
876 {
877     DPRINT("Function %s() not implemented yet.\n", __func__);
878     return 0;
879 }
880 
881 static int
882 vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
883 {
884     DPRINT("Function %s() not implemented yet.\n", __func__);
885     return 0;
886 }
887 
888 static int
889 vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
890 {
891     DPRINT("Function %s() not implemented yet.\n", __func__);
892     return 0;
893 }
894 
895 static int
896 vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
897 {
898     /* Print out generic part of the request. */
899     DPRINT(
900            "==================   Vhost user message from QEMU   ==================\n");
901     DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
902            vmsg->request);
903     DPRINT("Flags:   0x%x\n", vmsg->flags);
904     DPRINT("Size:    %d\n", vmsg->size);
905 
906     if (vmsg->fd_num) {
907         int i;
908         DPRINT("Fds:");
909         for (i = 0; i < vmsg->fd_num; i++) {
910             DPRINT(" %d", vmsg->fds[i]);
911         }
912         DPRINT("\n");
913     }
914 
915     switch (vmsg->request) {
916     case VHOST_USER_NONE:
917         return vubr_none_exec(dev, vmsg);
918     case VHOST_USER_GET_FEATURES:
919         return vubr_get_features_exec(dev, vmsg);
920     case VHOST_USER_SET_FEATURES:
921         return vubr_set_features_exec(dev, vmsg);
922     case VHOST_USER_SET_OWNER:
923         return vubr_set_owner_exec(dev, vmsg);
924     case VHOST_USER_RESET_OWNER:
925         return vubr_reset_device_exec(dev, vmsg);
926     case VHOST_USER_SET_MEM_TABLE:
927         return vubr_set_mem_table_exec(dev, vmsg);
928     case VHOST_USER_SET_LOG_BASE:
929         return vubr_set_log_base_exec(dev, vmsg);
930     case VHOST_USER_SET_LOG_FD:
931         return vubr_set_log_fd_exec(dev, vmsg);
932     case VHOST_USER_SET_VRING_NUM:
933         return vubr_set_vring_num_exec(dev, vmsg);
934     case VHOST_USER_SET_VRING_ADDR:
935         return vubr_set_vring_addr_exec(dev, vmsg);
936     case VHOST_USER_SET_VRING_BASE:
937         return vubr_set_vring_base_exec(dev, vmsg);
938     case VHOST_USER_GET_VRING_BASE:
939         return vubr_get_vring_base_exec(dev, vmsg);
940     case VHOST_USER_SET_VRING_KICK:
941         return vubr_set_vring_kick_exec(dev, vmsg);
942     case VHOST_USER_SET_VRING_CALL:
943         return vubr_set_vring_call_exec(dev, vmsg);
944     case VHOST_USER_SET_VRING_ERR:
945         return vubr_set_vring_err_exec(dev, vmsg);
946     case VHOST_USER_GET_PROTOCOL_FEATURES:
947         return vubr_get_protocol_features_exec(dev, vmsg);
948     case VHOST_USER_SET_PROTOCOL_FEATURES:
949         return vubr_set_protocol_features_exec(dev, vmsg);
950     case VHOST_USER_GET_QUEUE_NUM:
951         return vubr_get_queue_num_exec(dev, vmsg);
952     case VHOST_USER_SET_VRING_ENABLE:
953         return vubr_set_vring_enable_exec(dev, vmsg);
954     case VHOST_USER_SEND_RARP:
955         return vubr_send_rarp_exec(dev, vmsg);
956 
957     case VHOST_USER_MAX:
958         assert(vmsg->request != VHOST_USER_MAX);
959     }
960     return 0;
961 }
962 
963 static void
964 vubr_receive_cb(int sock, void *ctx)
965 {
966     VubrDev *dev = (VubrDev *) ctx;
967     VhostUserMsg vmsg;
968     int reply_requested;
969 
970     vubr_message_read(sock, &vmsg);
971     reply_requested = vubr_execute_request(dev, &vmsg);
972     if (reply_requested) {
973         /* Set the version in the flags when sending the reply */
974         vmsg.flags &= ~VHOST_USER_VERSION_MASK;
975         vmsg.flags |= VHOST_USER_VERSION;
976         vmsg.flags |= VHOST_USER_REPLY_MASK;
977         vubr_message_write(sock, &vmsg);
978     }
979 }
980 
981 static void
982 vubr_accept_cb(int sock, void *ctx)
983 {
984     VubrDev *dev = (VubrDev *)ctx;
985     int conn_fd;
986     struct sockaddr_un un;
987     socklen_t len = sizeof(un);
988 
989     conn_fd = accept(sock, (struct sockaddr *) &un, &len);
990     if (conn_fd  == -1) {
991         vubr_die("accept()");
992     }
993     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
994     dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
995 }
996 
997 static VubrDev *
998 vubr_new(const char *path)
999 {
1000     VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
1001     dev->nregions = 0;
1002     int i;
1003     struct sockaddr_un un;
1004     size_t len;
1005 
1006     for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
1007         dev->vq[i] = (VubrVirtq) {
1008             .call_fd = -1, .kick_fd = -1,
1009             .size = 0,
1010             .last_avail_index = 0, .last_used_index = 0,
1011             .desc = 0, .avail = 0, .used = 0,
1012         };
1013     }
1014 
1015     /* Get a UNIX socket. */
1016     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
1017     if (dev->sock == -1) {
1018         vubr_die("socket");
1019     }
1020 
1021     un.sun_family = AF_UNIX;
1022     strcpy(un.sun_path, path);
1023     len = sizeof(un.sun_family) + strlen(path);
1024     unlink(path);
1025 
1026     if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
1027         vubr_die("bind");
1028     }
1029 
1030     if (listen(dev->sock, 1) == -1) {
1031         vubr_die("listen");
1032     }
1033 
1034     dispatcher_init(&dev->dispatcher);
1035     dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
1036                    vubr_accept_cb);
1037 
1038     DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
1039     return dev;
1040 }
1041 
1042 static void
1043 vubr_backend_udp_setup(VubrDev *dev,
1044                        const char *local_host,
1045                        uint16_t local_port,
1046                        const char *dest_host,
1047                        uint16_t dest_port)
1048 {
1049     int sock;
1050     struct sockaddr_in si_local = {
1051         .sin_family = AF_INET,
1052         .sin_port = htons(local_port),
1053     };
1054 
1055     if (inet_aton(local_host, &si_local.sin_addr) == 0) {
1056         fprintf(stderr, "inet_aton() failed.\n");
1057         exit(1);
1058     }
1059 
1060     /* setup destination for sends */
1061     dev->backend_udp_dest = (struct sockaddr_in) {
1062         .sin_family = AF_INET,
1063         .sin_port = htons(dest_port),
1064     };
1065     if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) {
1066         fprintf(stderr, "inet_aton() failed.\n");
1067         exit(1);
1068     }
1069 
1070     sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
1071     if (sock == -1) {
1072         vubr_die("socket");
1073     }
1074 
1075     if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
1076         vubr_die("bind");
1077     }
1078 
1079     dev->backend_udp_sock = sock;
1080     dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
1081     DPRINT("Waiting for data from udp backend on %s:%d...\n",
1082            local_host, local_port);
1083 }
1084 
1085 static void
1086 vubr_run(VubrDev *dev)
1087 {
1088     while (1) {
1089         /* timeout 200ms */
1090         dispatcher_wait(&dev->dispatcher, 200000);
1091         /* Here one can try polling strategy. */
1092     }
1093 }
1094 
1095 int
1096 main(int argc, char *argv[])
1097 {
1098     VubrDev *dev;
1099 
1100     dev = vubr_new("/tmp/vubr.sock");
1101     if (!dev) {
1102         return 1;
1103     }
1104 
1105     vubr_backend_udp_setup(dev,
1106                                  "127.0.0.1", 4444,
1107                                  "127.0.0.1", 5555);
1108     vubr_run(dev);
1109     return 0;
1110 }
1111