xref: /openbmc/qemu/tests/vhost-user-bridge.c (revision 80adf54e)
1 /*
2  * Vhost User Bridge
3  *
4  * Copyright (c) 2015 Red Hat, Inc.
5  *
6  * Authors:
7  *  Victor Kaplansky <victork@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  */
12 
13 /*
14  * TODO:
15  *     - main should get parameters from the command line.
16  *     - implement all request handlers. Still not implemented:
17  *          vubr_get_queue_num_exec()
18  *          vubr_send_rarp_exec()
19  *     - test for broken requests and virtqueue.
20  *     - implement features defined by Virtio 1.0 spec.
21  *     - support mergeable buffers and indirect descriptors.
22  *     - implement clean shutdown.
23  *     - implement non-blocking writes to UDP backend.
24  *     - implement polling strategy.
25  *     - implement clean starting/stopping of vq processing
26  *     - implement clean starting/stopping of used and buffers
27  *       dirty page logging.
28  */
29 
30 #define _FILE_OFFSET_BITS 64
31 
32 #include "qemu/osdep.h"
33 #include "qemu/iov.h"
34 #include "standard-headers/linux/virtio_net.h"
35 #include "contrib/libvhost-user/libvhost-user.h"
36 
37 #define VHOST_USER_BRIDGE_DEBUG 1
38 
39 #define DPRINT(...) \
40     do { \
41         if (VHOST_USER_BRIDGE_DEBUG) { \
42             printf(__VA_ARGS__); \
43         } \
44     } while (0)
45 
46 typedef void (*CallbackFunc)(int sock, void *ctx);
47 
48 typedef struct Event {
49     void *ctx;
50     CallbackFunc callback;
51 } Event;
52 
53 typedef struct Dispatcher {
54     int max_sock;
55     fd_set fdset;
56     Event events[FD_SETSIZE];
57 } Dispatcher;
58 
59 typedef struct VubrDev {
60     VuDev vudev;
61     Dispatcher dispatcher;
62     int backend_udp_sock;
63     struct sockaddr_in backend_udp_dest;
64     int hdrlen;
65     int sock;
66     int ready;
67     int quit;
68 } VubrDev;
69 
70 static void
71 vubr_die(const char *s)
72 {
73     perror(s);
74     exit(1);
75 }
76 
77 static int
78 dispatcher_init(Dispatcher *dispr)
79 {
80     FD_ZERO(&dispr->fdset);
81     dispr->max_sock = -1;
82     return 0;
83 }
84 
85 static int
86 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
87 {
88     if (sock >= FD_SETSIZE) {
89         fprintf(stderr,
90                 "Error: Failed to add new event. sock %d should be less than %d\n",
91                 sock, FD_SETSIZE);
92         return -1;
93     }
94 
95     dispr->events[sock].ctx = ctx;
96     dispr->events[sock].callback = cb;
97 
98     FD_SET(sock, &dispr->fdset);
99     if (sock > dispr->max_sock) {
100         dispr->max_sock = sock;
101     }
102     DPRINT("Added sock %d for watching. max_sock: %d\n",
103            sock, dispr->max_sock);
104     return 0;
105 }
106 
107 static int
108 dispatcher_remove(Dispatcher *dispr, int sock)
109 {
110     if (sock >= FD_SETSIZE) {
111         fprintf(stderr,
112                 "Error: Failed to remove event. sock %d should be less than %d\n",
113                 sock, FD_SETSIZE);
114         return -1;
115     }
116 
117     FD_CLR(sock, &dispr->fdset);
118     DPRINT("Sock %d removed from dispatcher watch.\n", sock);
119     return 0;
120 }
121 
122 /* timeout in us */
123 static int
124 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
125 {
126     struct timeval tv;
127     tv.tv_sec = timeout / 1000000;
128     tv.tv_usec = timeout % 1000000;
129 
130     fd_set fdset = dispr->fdset;
131 
132     /* wait until some of sockets become readable. */
133     int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
134 
135     if (rc == -1) {
136         vubr_die("select");
137     }
138 
139     /* Timeout */
140     if (rc == 0) {
141         return 0;
142     }
143 
144     /* Now call callback for every ready socket. */
145 
146     int sock;
147     for (sock = 0; sock < dispr->max_sock + 1; sock++) {
148         /* The callback on a socket can remove other sockets from the
149          * dispatcher, thus we have to check that the socket is
150          * still not removed from dispatcher's list
151          */
152         if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
153             Event *e = &dispr->events[sock];
154             e->callback(sock, e->ctx);
155         }
156     }
157 
158     return 0;
159 }
160 
161 static void
162 vubr_handle_tx(VuDev *dev, int qidx)
163 {
164     VuVirtq *vq = vu_get_queue(dev, qidx);
165     VubrDev *vubr = container_of(dev, VubrDev, vudev);
166     int hdrlen = vubr->hdrlen;
167     VuVirtqElement *elem = NULL;
168 
169     assert(qidx % 2);
170 
171     for (;;) {
172         ssize_t ret;
173         unsigned int out_num;
174         struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
175 
176         elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
177         if (!elem) {
178             break;
179         }
180 
181         out_num = elem->out_num;
182         out_sg = elem->out_sg;
183         if (out_num < 1) {
184             fprintf(stderr, "virtio-net header not in first element\n");
185             break;
186         }
187         if (VHOST_USER_BRIDGE_DEBUG) {
188             iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
189         }
190 
191         if (hdrlen) {
192             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
193                                        out_sg, out_num,
194                                        hdrlen, -1);
195             out_num = sg_num;
196             out_sg = sg;
197         }
198 
199         struct msghdr msg = {
200             .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
201             .msg_namelen = sizeof(struct sockaddr_in),
202             .msg_iov = out_sg,
203             .msg_iovlen = out_num,
204         };
205         do {
206             ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
207         } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
208 
209         if (ret == -1) {
210             vubr_die("sendmsg()");
211         }
212 
213         vu_queue_push(dev, vq, elem, 0);
214         vu_queue_notify(dev, vq);
215 
216         free(elem);
217         elem = NULL;
218     }
219 
220     free(elem);
221 }
222 
223 
224 /* this function reverse the effect of iov_discard_front() it must be
225  * called with 'front' being the original struct iovec and 'bytes'
226  * being the number of bytes you shaved off
227  */
228 static void
229 iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
230 {
231     struct iovec *cur;
232 
233     for (cur = front; cur != iov; cur++) {
234         assert(bytes >= cur->iov_len);
235         bytes -= cur->iov_len;
236     }
237 
238     cur->iov_base -= bytes;
239     cur->iov_len += bytes;
240 }
241 
242 static void
243 iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
244 {
245     unsigned i;
246 
247     for (i = 0; i < iovc; i++, iov++) {
248         if (bytes < iov->iov_len) {
249             iov->iov_len = bytes;
250             return;
251         }
252 
253         bytes -= iov->iov_len;
254     }
255 
256     assert(!"couldn't truncate iov");
257 }
258 
259 static void
260 vubr_backend_recv_cb(int sock, void *ctx)
261 {
262     VubrDev *vubr = (VubrDev *) ctx;
263     VuDev *dev = &vubr->vudev;
264     VuVirtq *vq = vu_get_queue(dev, 0);
265     VuVirtqElement *elem = NULL;
266     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
267     struct virtio_net_hdr_mrg_rxbuf mhdr;
268     unsigned mhdr_cnt = 0;
269     int hdrlen = vubr->hdrlen;
270     int i = 0;
271     struct virtio_net_hdr hdr = {
272         .flags = 0,
273         .gso_type = VIRTIO_NET_HDR_GSO_NONE
274     };
275 
276     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
277     DPRINT("    hdrlen = %d\n", hdrlen);
278 
279     if (!vu_queue_enabled(dev, vq) ||
280         !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
281         DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
282         return;
283     }
284 
285     do {
286         struct iovec *sg;
287         ssize_t ret, total = 0;
288         unsigned int num;
289 
290         elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
291         if (!elem) {
292             break;
293         }
294 
295         if (elem->in_num < 1) {
296             fprintf(stderr, "virtio-net contains no in buffers\n");
297             break;
298         }
299 
300         sg = elem->in_sg;
301         num = elem->in_num;
302         if (i == 0) {
303             if (hdrlen == 12) {
304                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
305                                     sg, elem->in_num,
306                                     offsetof(typeof(mhdr), num_buffers),
307                                     sizeof(mhdr.num_buffers));
308             }
309             iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
310             total += hdrlen;
311             ret = iov_discard_front(&sg, &num, hdrlen);
312             assert(ret == hdrlen);
313         }
314 
315         struct msghdr msg = {
316             .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
317             .msg_namelen = sizeof(struct sockaddr_in),
318             .msg_iov = sg,
319             .msg_iovlen = elem->in_num,
320             .msg_flags = MSG_DONTWAIT,
321         };
322         do {
323             ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
324         } while (ret == -1 && (errno == EINTR));
325 
326         if (i == 0) {
327             iov_restore_front(elem->in_sg, sg, hdrlen);
328         }
329 
330         if (ret == -1) {
331             if (errno == EWOULDBLOCK) {
332                 vu_queue_rewind(dev, vq, 1);
333                 break;
334             }
335 
336             vubr_die("recvmsg()");
337         }
338 
339         total += ret;
340         iov_truncate(elem->in_sg, elem->in_num, total);
341         vu_queue_fill(dev, vq, elem, total, i++);
342 
343         free(elem);
344         elem = NULL;
345     } while (false); /* could loop if DONTWAIT worked? */
346 
347     if (mhdr_cnt) {
348         mhdr.num_buffers = i;
349         iov_from_buf(mhdr_sg, mhdr_cnt,
350                      0,
351                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
352     }
353 
354     vu_queue_flush(dev, vq, i);
355     vu_queue_notify(dev, vq);
356 
357     free(elem);
358 }
359 
360 static void
361 vubr_receive_cb(int sock, void *ctx)
362 {
363     VubrDev *vubr = (VubrDev *)ctx;
364 
365     if (!vu_dispatch(&vubr->vudev)) {
366         fprintf(stderr, "Error while dispatching\n");
367     }
368 }
369 
370 typedef struct WatchData {
371     VuDev *dev;
372     vu_watch_cb cb;
373     void *data;
374 } WatchData;
375 
376 static void
377 watch_cb(int sock, void *ctx)
378 {
379     struct WatchData *wd = ctx;
380 
381     wd->cb(wd->dev, VU_WATCH_IN, wd->data);
382 }
383 
384 static void
385 vubr_set_watch(VuDev *dev, int fd, int condition,
386                vu_watch_cb cb, void *data)
387 {
388     VubrDev *vubr = container_of(dev, VubrDev, vudev);
389     static WatchData watches[FD_SETSIZE];
390     struct WatchData *wd = &watches[fd];
391 
392     wd->cb = cb;
393     wd->data = data;
394     wd->dev = dev;
395     dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
396 }
397 
398 static void
399 vubr_remove_watch(VuDev *dev, int fd)
400 {
401     VubrDev *vubr = container_of(dev, VubrDev, vudev);
402 
403     dispatcher_remove(&vubr->dispatcher, fd);
404 }
405 
406 static int
407 vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
408 {
409     DPRINT("Function %s() not implemented yet.\n", __func__);
410     return 0;
411 }
412 
413 static int
414 vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
415 {
416     switch (vmsg->request) {
417     case VHOST_USER_SEND_RARP:
418         *do_reply = vubr_send_rarp_exec(dev, vmsg);
419         return 1;
420     default:
421         /* let the library handle the rest */
422         return 0;
423     }
424 
425     return 0;
426 }
427 
428 static void
429 vubr_set_features(VuDev *dev, uint64_t features)
430 {
431     VubrDev *vubr = container_of(dev, VubrDev, vudev);
432 
433     if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
434         (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
435         vubr->hdrlen = 12;
436     } else {
437         vubr->hdrlen = 10;
438     }
439 }
440 
441 static uint64_t
442 vubr_get_features(VuDev *dev)
443 {
444     return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
445         1ULL << VIRTIO_NET_F_MRG_RXBUF;
446 }
447 
448 static void
449 vubr_queue_set_started(VuDev *dev, int qidx, bool started)
450 {
451     VuVirtq *vq = vu_get_queue(dev, qidx);
452 
453     if (qidx % 2 == 1) {
454         vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
455     }
456 }
457 
458 static void
459 vubr_panic(VuDev *dev, const char *msg)
460 {
461     VubrDev *vubr = container_of(dev, VubrDev, vudev);
462 
463     fprintf(stderr, "PANIC: %s\n", msg);
464 
465     dispatcher_remove(&vubr->dispatcher, dev->sock);
466     vubr->quit = 1;
467 }
468 
469 static const VuDevIface vuiface = {
470     .get_features = vubr_get_features,
471     .set_features = vubr_set_features,
472     .process_msg = vubr_process_msg,
473     .queue_set_started = vubr_queue_set_started,
474 };
475 
476 static void
477 vubr_accept_cb(int sock, void *ctx)
478 {
479     VubrDev *dev = (VubrDev *)ctx;
480     int conn_fd;
481     struct sockaddr_un un;
482     socklen_t len = sizeof(un);
483 
484     conn_fd = accept(sock, (struct sockaddr *) &un, &len);
485     if (conn_fd == -1) {
486         vubr_die("accept()");
487     }
488     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
489 
490     vu_init(&dev->vudev,
491             conn_fd,
492             vubr_panic,
493             vubr_set_watch,
494             vubr_remove_watch,
495             &vuiface);
496 
497     dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
498     dispatcher_remove(&dev->dispatcher, sock);
499 }
500 
501 static VubrDev *
502 vubr_new(const char *path, bool client)
503 {
504     VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
505     struct sockaddr_un un;
506     CallbackFunc cb;
507     size_t len;
508 
509     /* Get a UNIX socket. */
510     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
511     if (dev->sock == -1) {
512         vubr_die("socket");
513     }
514 
515     un.sun_family = AF_UNIX;
516     strcpy(un.sun_path, path);
517     len = sizeof(un.sun_family) + strlen(path);
518 
519     if (!client) {
520         unlink(path);
521 
522         if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
523             vubr_die("bind");
524         }
525 
526         if (listen(dev->sock, 1) == -1) {
527             vubr_die("listen");
528         }
529         cb = vubr_accept_cb;
530 
531         DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
532     } else {
533         if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
534             vubr_die("connect");
535         }
536         vu_init(&dev->vudev,
537                 dev->sock,
538                 vubr_panic,
539                 vubr_set_watch,
540                 vubr_remove_watch,
541                 &vuiface);
542         cb = vubr_receive_cb;
543     }
544 
545     dispatcher_init(&dev->dispatcher);
546 
547     dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
548 
549     return dev;
550 }
551 
552 static void
553 vubr_set_host(struct sockaddr_in *saddr, const char *host)
554 {
555     if (isdigit(host[0])) {
556         if (!inet_aton(host, &saddr->sin_addr)) {
557             fprintf(stderr, "inet_aton() failed.\n");
558             exit(1);
559         }
560     } else {
561         struct hostent *he = gethostbyname(host);
562 
563         if (!he) {
564             fprintf(stderr, "gethostbyname() failed.\n");
565             exit(1);
566         }
567         saddr->sin_addr = *(struct in_addr *)he->h_addr;
568     }
569 }
570 
571 static void
572 vubr_backend_udp_setup(VubrDev *dev,
573                        const char *local_host,
574                        const char *local_port,
575                        const char *remote_host,
576                        const char *remote_port)
577 {
578     int sock;
579     const char *r;
580 
581     int lport, rport;
582 
583     lport = strtol(local_port, (char **)&r, 0);
584     if (r == local_port) {
585         fprintf(stderr, "lport parsing failed.\n");
586         exit(1);
587     }
588 
589     rport = strtol(remote_port, (char **)&r, 0);
590     if (r == remote_port) {
591         fprintf(stderr, "rport parsing failed.\n");
592         exit(1);
593     }
594 
595     struct sockaddr_in si_local = {
596         .sin_family = AF_INET,
597         .sin_port = htons(lport),
598     };
599 
600     vubr_set_host(&si_local, local_host);
601 
602     /* setup destination for sends */
603     dev->backend_udp_dest = (struct sockaddr_in) {
604         .sin_family = AF_INET,
605         .sin_port = htons(rport),
606     };
607     vubr_set_host(&dev->backend_udp_dest, remote_host);
608 
609     sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
610     if (sock == -1) {
611         vubr_die("socket");
612     }
613 
614     if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
615         vubr_die("bind");
616     }
617 
618     dev->backend_udp_sock = sock;
619     dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
620     DPRINT("Waiting for data from udp backend on %s:%d...\n",
621            local_host, lport);
622 }
623 
624 static void
625 vubr_run(VubrDev *dev)
626 {
627     while (!dev->quit) {
628         /* timeout 200ms */
629         dispatcher_wait(&dev->dispatcher, 200000);
630         /* Here one can try polling strategy. */
631     }
632 }
633 
634 static int
635 vubr_parse_host_port(const char **host, const char **port, const char *buf)
636 {
637     char *p = strchr(buf, ':');
638 
639     if (!p) {
640         return -1;
641     }
642     *p = '\0';
643     *host = strdup(buf);
644     *port = strdup(p + 1);
645     return 0;
646 }
647 
648 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
649 #define DEFAULT_LHOST "127.0.0.1"
650 #define DEFAULT_LPORT "4444"
651 #define DEFAULT_RHOST "127.0.0.1"
652 #define DEFAULT_RPORT "5555"
653 
654 static const char *ud_socket_path = DEFAULT_UD_SOCKET;
655 static const char *lhost = DEFAULT_LHOST;
656 static const char *lport = DEFAULT_LPORT;
657 static const char *rhost = DEFAULT_RHOST;
658 static const char *rport = DEFAULT_RPORT;
659 
660 int
661 main(int argc, char *argv[])
662 {
663     VubrDev *dev;
664     int opt;
665     bool client = false;
666 
667     while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) {
668 
669         switch (opt) {
670         case 'l':
671             if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
672                 goto out;
673             }
674             break;
675         case 'r':
676             if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
677                 goto out;
678             }
679             break;
680         case 'u':
681             ud_socket_path = strdup(optarg);
682             break;
683         case 'c':
684             client = true;
685             break;
686         default:
687             goto out;
688         }
689     }
690 
691     DPRINT("ud socket: %s (%s)\n", ud_socket_path,
692            client ? "client" : "server");
693     DPRINT("local:     %s:%s\n", lhost, lport);
694     DPRINT("remote:    %s:%s\n", rhost, rport);
695 
696     dev = vubr_new(ud_socket_path, client);
697     if (!dev) {
698         return 1;
699     }
700 
701     vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
702     vubr_run(dev);
703 
704     vu_deinit(&dev->vudev);
705 
706     return 0;
707 
708 out:
709     fprintf(stderr, "Usage: %s ", argv[0]);
710     fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
711     fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
712             DEFAULT_UD_SOCKET);
713     fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
714             DEFAULT_LHOST, DEFAULT_LPORT);
715     fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
716             DEFAULT_RHOST, DEFAULT_RPORT);
717     fprintf(stderr, "\t-c client mode\n");
718 
719     return 1;
720 }
721