xref: /openbmc/qemu/tests/vhost-user-bridge.c (revision c39f95dc)
1 /*
2  * Vhost User Bridge
3  *
4  * Copyright (c) 2015 Red Hat, Inc.
5  *
6  * Authors:
7  *  Victor Kaplansky <victork@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  */
12 
13 /*
14  * TODO:
15  *     - main should get parameters from the command line.
16  *     - implement all request handlers. Still not implemented:
17  *          vubr_get_queue_num_exec()
18  *          vubr_send_rarp_exec()
19  *     - test for broken requests and virtqueue.
20  *     - implement features defined by Virtio 1.0 spec.
21  *     - support mergeable buffers and indirect descriptors.
22  *     - implement clean shutdown.
23  *     - implement non-blocking writes to UDP backend.
24  *     - implement polling strategy.
25  *     - implement clean starting/stopping of vq processing
26  *     - implement clean starting/stopping of used and buffers
27  *       dirty page logging.
28  */
29 
30 #define _FILE_OFFSET_BITS 64
31 
32 #include "qemu/osdep.h"
33 #include "qemu/iov.h"
34 #include "standard-headers/linux/virtio_net.h"
35 #include "contrib/libvhost-user/libvhost-user.h"
36 
37 #define VHOST_USER_BRIDGE_DEBUG 1
38 
39 #define DPRINT(...) \
40     do { \
41         if (VHOST_USER_BRIDGE_DEBUG) { \
42             printf(__VA_ARGS__); \
43         } \
44     } while (0)
45 
46 typedef void (*CallbackFunc)(int sock, void *ctx);
47 
48 typedef struct Event {
49     void *ctx;
50     CallbackFunc callback;
51 } Event;
52 
53 typedef struct Dispatcher {
54     int max_sock;
55     fd_set fdset;
56     Event events[FD_SETSIZE];
57 } Dispatcher;
58 
59 typedef struct VubrDev {
60     VuDev vudev;
61     Dispatcher dispatcher;
62     int backend_udp_sock;
63     struct sockaddr_in backend_udp_dest;
64     int hdrlen;
65     int sock;
66     int ready;
67     int quit;
68 } VubrDev;
69 
70 static void
71 vubr_die(const char *s)
72 {
73     perror(s);
74     exit(1);
75 }
76 
77 static int
78 dispatcher_init(Dispatcher *dispr)
79 {
80     FD_ZERO(&dispr->fdset);
81     dispr->max_sock = -1;
82     return 0;
83 }
84 
85 static int
86 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
87 {
88     if (sock >= FD_SETSIZE) {
89         fprintf(stderr,
90                 "Error: Failed to add new event. sock %d should be less than %d\n",
91                 sock, FD_SETSIZE);
92         return -1;
93     }
94 
95     dispr->events[sock].ctx = ctx;
96     dispr->events[sock].callback = cb;
97 
98     FD_SET(sock, &dispr->fdset);
99     if (sock > dispr->max_sock) {
100         dispr->max_sock = sock;
101     }
102     DPRINT("Added sock %d for watching. max_sock: %d\n",
103            sock, dispr->max_sock);
104     return 0;
105 }
106 
107 static int
108 dispatcher_remove(Dispatcher *dispr, int sock)
109 {
110     if (sock >= FD_SETSIZE) {
111         fprintf(stderr,
112                 "Error: Failed to remove event. sock %d should be less than %d\n",
113                 sock, FD_SETSIZE);
114         return -1;
115     }
116 
117     FD_CLR(sock, &dispr->fdset);
118     DPRINT("Sock %d removed from dispatcher watch.\n", sock);
119     return 0;
120 }
121 
122 /* timeout in us */
123 static int
124 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
125 {
126     struct timeval tv;
127     tv.tv_sec = timeout / 1000000;
128     tv.tv_usec = timeout % 1000000;
129 
130     fd_set fdset = dispr->fdset;
131 
132     /* wait until some of sockets become readable. */
133     int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
134 
135     if (rc == -1) {
136         vubr_die("select");
137     }
138 
139     /* Timeout */
140     if (rc == 0) {
141         return 0;
142     }
143 
144     /* Now call callback for every ready socket. */
145 
146     int sock;
147     for (sock = 0; sock < dispr->max_sock + 1; sock++) {
148         /* The callback on a socket can remove other sockets from the
149          * dispatcher, thus we have to check that the socket is
150          * still not removed from dispatcher's list
151          */
152         if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
153             Event *e = &dispr->events[sock];
154             e->callback(sock, e->ctx);
155         }
156     }
157 
158     return 0;
159 }
160 
161 static void
162 vubr_handle_tx(VuDev *dev, int qidx)
163 {
164     VuVirtq *vq = vu_get_queue(dev, qidx);
165     VubrDev *vubr = container_of(dev, VubrDev, vudev);
166     int hdrlen = vubr->hdrlen;
167     VuVirtqElement *elem = NULL;
168 
169     assert(qidx % 2);
170 
171     for (;;) {
172         ssize_t ret;
173         unsigned int out_num;
174         struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
175 
176         elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
177         if (!elem) {
178             break;
179         }
180 
181         out_num = elem->out_num;
182         out_sg = elem->out_sg;
183         if (out_num < 1) {
184             fprintf(stderr, "virtio-net header not in first element\n");
185             break;
186         }
187         if (VHOST_USER_BRIDGE_DEBUG) {
188             iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
189         }
190 
191         if (hdrlen) {
192             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
193                                        out_sg, out_num,
194                                        hdrlen, -1);
195             out_num = sg_num;
196             out_sg = sg;
197         }
198 
199         struct msghdr msg = {
200             .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
201             .msg_namelen = sizeof(struct sockaddr_in),
202             .msg_iov = out_sg,
203             .msg_iovlen = out_num,
204         };
205         do {
206             ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
207         } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
208 
209         if (ret == -1) {
210             vubr_die("sendmsg()");
211         }
212 
213         vu_queue_push(dev, vq, elem, 0);
214         vu_queue_notify(dev, vq);
215 
216         free(elem);
217         elem = NULL;
218     }
219 
220     free(elem);
221 }
222 
223 
224 /* this function reverse the effect of iov_discard_front() it must be
225  * called with 'front' being the original struct iovec and 'bytes'
226  * being the number of bytes you shaved off
227  */
228 static void
229 iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
230 {
231     struct iovec *cur;
232 
233     for (cur = front; cur != iov; cur++) {
234         assert(bytes >= cur->iov_len);
235         bytes -= cur->iov_len;
236     }
237 
238     cur->iov_base -= bytes;
239     cur->iov_len += bytes;
240 }
241 
242 static void
243 iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
244 {
245     unsigned i;
246 
247     for (i = 0; i < iovc; i++, iov++) {
248         if (bytes < iov->iov_len) {
249             iov->iov_len = bytes;
250             return;
251         }
252 
253         bytes -= iov->iov_len;
254     }
255 
256     assert(!"couldn't truncate iov");
257 }
258 
259 static void
260 vubr_backend_recv_cb(int sock, void *ctx)
261 {
262     VubrDev *vubr = (VubrDev *) ctx;
263     VuDev *dev = &vubr->vudev;
264     VuVirtq *vq = vu_get_queue(dev, 0);
265     VuVirtqElement *elem = NULL;
266     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
267     struct virtio_net_hdr_mrg_rxbuf mhdr;
268     unsigned mhdr_cnt = 0;
269     int hdrlen = vubr->hdrlen;
270     int i = 0;
271     struct virtio_net_hdr hdr = {
272         .flags = 0,
273         .gso_type = VIRTIO_NET_HDR_GSO_NONE
274     };
275 
276     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
277     DPRINT("    hdrlen = %d\n", hdrlen);
278 
279     if (!vu_queue_enabled(dev, vq) ||
280         !vu_queue_started(dev, vq) ||
281         !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
282         DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
283         return;
284     }
285 
286     do {
287         struct iovec *sg;
288         ssize_t ret, total = 0;
289         unsigned int num;
290 
291         elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
292         if (!elem) {
293             break;
294         }
295 
296         if (elem->in_num < 1) {
297             fprintf(stderr, "virtio-net contains no in buffers\n");
298             break;
299         }
300 
301         sg = elem->in_sg;
302         num = elem->in_num;
303         if (i == 0) {
304             if (hdrlen == 12) {
305                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
306                                     sg, elem->in_num,
307                                     offsetof(typeof(mhdr), num_buffers),
308                                     sizeof(mhdr.num_buffers));
309             }
310             iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
311             total += hdrlen;
312             ret = iov_discard_front(&sg, &num, hdrlen);
313             assert(ret == hdrlen);
314         }
315 
316         struct msghdr msg = {
317             .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
318             .msg_namelen = sizeof(struct sockaddr_in),
319             .msg_iov = sg,
320             .msg_iovlen = elem->in_num,
321             .msg_flags = MSG_DONTWAIT,
322         };
323         do {
324             ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
325         } while (ret == -1 && (errno == EINTR));
326 
327         if (i == 0) {
328             iov_restore_front(elem->in_sg, sg, hdrlen);
329         }
330 
331         if (ret == -1) {
332             if (errno == EWOULDBLOCK) {
333                 vu_queue_rewind(dev, vq, 1);
334                 break;
335             }
336 
337             vubr_die("recvmsg()");
338         }
339 
340         total += ret;
341         iov_truncate(elem->in_sg, elem->in_num, total);
342         vu_queue_fill(dev, vq, elem, total, i++);
343 
344         free(elem);
345         elem = NULL;
346     } while (false); /* could loop if DONTWAIT worked? */
347 
348     if (mhdr_cnt) {
349         mhdr.num_buffers = i;
350         iov_from_buf(mhdr_sg, mhdr_cnt,
351                      0,
352                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
353     }
354 
355     vu_queue_flush(dev, vq, i);
356     vu_queue_notify(dev, vq);
357 
358     free(elem);
359 }
360 
361 static void
362 vubr_receive_cb(int sock, void *ctx)
363 {
364     VubrDev *vubr = (VubrDev *)ctx;
365 
366     if (!vu_dispatch(&vubr->vudev)) {
367         fprintf(stderr, "Error while dispatching\n");
368     }
369 }
370 
371 typedef struct WatchData {
372     VuDev *dev;
373     vu_watch_cb cb;
374     void *data;
375 } WatchData;
376 
377 static void
378 watch_cb(int sock, void *ctx)
379 {
380     struct WatchData *wd = ctx;
381 
382     wd->cb(wd->dev, VU_WATCH_IN, wd->data);
383 }
384 
385 static void
386 vubr_set_watch(VuDev *dev, int fd, int condition,
387                vu_watch_cb cb, void *data)
388 {
389     VubrDev *vubr = container_of(dev, VubrDev, vudev);
390     static WatchData watches[FD_SETSIZE];
391     struct WatchData *wd = &watches[fd];
392 
393     wd->cb = cb;
394     wd->data = data;
395     wd->dev = dev;
396     dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
397 }
398 
399 static void
400 vubr_remove_watch(VuDev *dev, int fd)
401 {
402     VubrDev *vubr = container_of(dev, VubrDev, vudev);
403 
404     dispatcher_remove(&vubr->dispatcher, fd);
405 }
406 
407 static int
408 vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
409 {
410     DPRINT("Function %s() not implemented yet.\n", __func__);
411     return 0;
412 }
413 
414 static int
415 vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
416 {
417     switch (vmsg->request) {
418     case VHOST_USER_SEND_RARP:
419         *do_reply = vubr_send_rarp_exec(dev, vmsg);
420         return 1;
421     default:
422         /* let the library handle the rest */
423         return 0;
424     }
425 
426     return 0;
427 }
428 
429 static void
430 vubr_set_features(VuDev *dev, uint64_t features)
431 {
432     VubrDev *vubr = container_of(dev, VubrDev, vudev);
433 
434     if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
435         (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
436         vubr->hdrlen = 12;
437     } else {
438         vubr->hdrlen = 10;
439     }
440 }
441 
442 static uint64_t
443 vubr_get_features(VuDev *dev)
444 {
445     return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
446         1ULL << VIRTIO_NET_F_MRG_RXBUF;
447 }
448 
449 static void
450 vubr_queue_set_started(VuDev *dev, int qidx, bool started)
451 {
452     VuVirtq *vq = vu_get_queue(dev, qidx);
453 
454     if (qidx % 2 == 1) {
455         vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
456     }
457 }
458 
459 static void
460 vubr_panic(VuDev *dev, const char *msg)
461 {
462     VubrDev *vubr = container_of(dev, VubrDev, vudev);
463 
464     fprintf(stderr, "PANIC: %s\n", msg);
465 
466     dispatcher_remove(&vubr->dispatcher, dev->sock);
467     vubr->quit = 1;
468 }
469 
470 static bool
471 vubr_queue_is_processed_in_order(VuDev *dev, int qidx)
472 {
473     return true;
474 }
475 
476 static const VuDevIface vuiface = {
477     .get_features = vubr_get_features,
478     .set_features = vubr_set_features,
479     .process_msg = vubr_process_msg,
480     .queue_set_started = vubr_queue_set_started,
481     .queue_is_processed_in_order = vubr_queue_is_processed_in_order,
482 };
483 
484 static void
485 vubr_accept_cb(int sock, void *ctx)
486 {
487     VubrDev *dev = (VubrDev *)ctx;
488     int conn_fd;
489     struct sockaddr_un un;
490     socklen_t len = sizeof(un);
491 
492     conn_fd = accept(sock, (struct sockaddr *) &un, &len);
493     if (conn_fd == -1) {
494         vubr_die("accept()");
495     }
496     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
497 
498     vu_init(&dev->vudev,
499             conn_fd,
500             vubr_panic,
501             vubr_set_watch,
502             vubr_remove_watch,
503             &vuiface);
504 
505     dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
506     dispatcher_remove(&dev->dispatcher, sock);
507 }
508 
509 static VubrDev *
510 vubr_new(const char *path, bool client)
511 {
512     VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
513     struct sockaddr_un un;
514     CallbackFunc cb;
515     size_t len;
516 
517     /* Get a UNIX socket. */
518     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
519     if (dev->sock == -1) {
520         vubr_die("socket");
521     }
522 
523     un.sun_family = AF_UNIX;
524     strcpy(un.sun_path, path);
525     len = sizeof(un.sun_family) + strlen(path);
526 
527     if (!client) {
528         unlink(path);
529 
530         if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
531             vubr_die("bind");
532         }
533 
534         if (listen(dev->sock, 1) == -1) {
535             vubr_die("listen");
536         }
537         cb = vubr_accept_cb;
538 
539         DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
540     } else {
541         if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
542             vubr_die("connect");
543         }
544         vu_init(&dev->vudev,
545                 dev->sock,
546                 vubr_panic,
547                 vubr_set_watch,
548                 vubr_remove_watch,
549                 &vuiface);
550         cb = vubr_receive_cb;
551     }
552 
553     dispatcher_init(&dev->dispatcher);
554 
555     dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
556 
557     return dev;
558 }
559 
560 static void
561 vubr_set_host(struct sockaddr_in *saddr, const char *host)
562 {
563     if (isdigit(host[0])) {
564         if (!inet_aton(host, &saddr->sin_addr)) {
565             fprintf(stderr, "inet_aton() failed.\n");
566             exit(1);
567         }
568     } else {
569         struct hostent *he = gethostbyname(host);
570 
571         if (!he) {
572             fprintf(stderr, "gethostbyname() failed.\n");
573             exit(1);
574         }
575         saddr->sin_addr = *(struct in_addr *)he->h_addr;
576     }
577 }
578 
579 static void
580 vubr_backend_udp_setup(VubrDev *dev,
581                        const char *local_host,
582                        const char *local_port,
583                        const char *remote_host,
584                        const char *remote_port)
585 {
586     int sock;
587     const char *r;
588 
589     int lport, rport;
590 
591     lport = strtol(local_port, (char **)&r, 0);
592     if (r == local_port) {
593         fprintf(stderr, "lport parsing failed.\n");
594         exit(1);
595     }
596 
597     rport = strtol(remote_port, (char **)&r, 0);
598     if (r == remote_port) {
599         fprintf(stderr, "rport parsing failed.\n");
600         exit(1);
601     }
602 
603     struct sockaddr_in si_local = {
604         .sin_family = AF_INET,
605         .sin_port = htons(lport),
606     };
607 
608     vubr_set_host(&si_local, local_host);
609 
610     /* setup destination for sends */
611     dev->backend_udp_dest = (struct sockaddr_in) {
612         .sin_family = AF_INET,
613         .sin_port = htons(rport),
614     };
615     vubr_set_host(&dev->backend_udp_dest, remote_host);
616 
617     sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
618     if (sock == -1) {
619         vubr_die("socket");
620     }
621 
622     if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
623         vubr_die("bind");
624     }
625 
626     dev->backend_udp_sock = sock;
627     dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
628     DPRINT("Waiting for data from udp backend on %s:%d...\n",
629            local_host, lport);
630 }
631 
632 static void
633 vubr_run(VubrDev *dev)
634 {
635     while (!dev->quit) {
636         /* timeout 200ms */
637         dispatcher_wait(&dev->dispatcher, 200000);
638         /* Here one can try polling strategy. */
639     }
640 }
641 
642 static int
643 vubr_parse_host_port(const char **host, const char **port, const char *buf)
644 {
645     char *p = strchr(buf, ':');
646 
647     if (!p) {
648         return -1;
649     }
650     *p = '\0';
651     *host = strdup(buf);
652     *port = strdup(p + 1);
653     return 0;
654 }
655 
656 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
657 #define DEFAULT_LHOST "127.0.0.1"
658 #define DEFAULT_LPORT "4444"
659 #define DEFAULT_RHOST "127.0.0.1"
660 #define DEFAULT_RPORT "5555"
661 
662 static const char *ud_socket_path = DEFAULT_UD_SOCKET;
663 static const char *lhost = DEFAULT_LHOST;
664 static const char *lport = DEFAULT_LPORT;
665 static const char *rhost = DEFAULT_RHOST;
666 static const char *rport = DEFAULT_RPORT;
667 
668 int
669 main(int argc, char *argv[])
670 {
671     VubrDev *dev;
672     int opt;
673     bool client = false;
674 
675     while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) {
676 
677         switch (opt) {
678         case 'l':
679             if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
680                 goto out;
681             }
682             break;
683         case 'r':
684             if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
685                 goto out;
686             }
687             break;
688         case 'u':
689             ud_socket_path = strdup(optarg);
690             break;
691         case 'c':
692             client = true;
693             break;
694         default:
695             goto out;
696         }
697     }
698 
699     DPRINT("ud socket: %s (%s)\n", ud_socket_path,
700            client ? "client" : "server");
701     DPRINT("local:     %s:%s\n", lhost, lport);
702     DPRINT("remote:    %s:%s\n", rhost, rport);
703 
704     dev = vubr_new(ud_socket_path, client);
705     if (!dev) {
706         return 1;
707     }
708 
709     vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
710     vubr_run(dev);
711 
712     vu_deinit(&dev->vudev);
713 
714     return 0;
715 
716 out:
717     fprintf(stderr, "Usage: %s ", argv[0]);
718     fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
719     fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
720             DEFAULT_UD_SOCKET);
721     fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
722             DEFAULT_LHOST, DEFAULT_LPORT);
723     fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
724             DEFAULT_RHOST, DEFAULT_RPORT);
725     fprintf(stderr, "\t-c client mode\n");
726 
727     return 1;
728 }
729