xref: /openbmc/qemu/tests/vhost-user-bridge.c (revision 9aa3397f)
1 /*
2  * Vhost User Bridge
3  *
4  * Copyright (c) 2015 Red Hat, Inc.
5  *
6  * Authors:
7  *  Victor Kaplansky <victork@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  */
12 
13 /*
14  * TODO:
15  *     - main should get parameters from the command line.
16  *     - implement all request handlers. Still not implemented:
17  *          vubr_get_queue_num_exec()
18  *          vubr_send_rarp_exec()
19  *     - test for broken requests and virtqueue.
20  *     - implement features defined by Virtio 1.0 spec.
21  *     - support mergeable buffers and indirect descriptors.
22  *     - implement clean shutdown.
23  *     - implement non-blocking writes to UDP backend.
24  *     - implement polling strategy.
25  *     - implement clean starting/stopping of vq processing
26  *     - implement clean starting/stopping of used and buffers
27  *       dirty page logging.
28  */
29 
30 #define _FILE_OFFSET_BITS 64
31 
32 #include "qemu/osdep.h"
33 #include "qemu/iov.h"
34 #include "standard-headers/linux/virtio_net.h"
35 #include "contrib/libvhost-user/libvhost-user.h"
36 
37 #define VHOST_USER_BRIDGE_DEBUG 1
38 
39 #define DPRINT(...) \
40     do { \
41         if (VHOST_USER_BRIDGE_DEBUG) { \
42             printf(__VA_ARGS__); \
43         } \
44     } while (0)
45 
46 typedef void (*CallbackFunc)(int sock, void *ctx);
47 
48 typedef struct Event {
49     void *ctx;
50     CallbackFunc callback;
51 } Event;
52 
53 typedef struct Dispatcher {
54     int max_sock;
55     fd_set fdset;
56     Event events[FD_SETSIZE];
57 } Dispatcher;
58 
59 typedef struct VubrDev {
60     VuDev vudev;
61     Dispatcher dispatcher;
62     int backend_udp_sock;
63     struct sockaddr_in backend_udp_dest;
64     int hdrlen;
65     int sock;
66     int ready;
67     int quit;
68 } VubrDev;
69 
70 static void
71 vubr_die(const char *s)
72 {
73     perror(s);
74     exit(1);
75 }
76 
77 static int
78 dispatcher_init(Dispatcher *dispr)
79 {
80     FD_ZERO(&dispr->fdset);
81     dispr->max_sock = -1;
82     return 0;
83 }
84 
85 static int
86 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
87 {
88     if (sock >= FD_SETSIZE) {
89         fprintf(stderr,
90                 "Error: Failed to add new event. sock %d should be less than %d\n",
91                 sock, FD_SETSIZE);
92         return -1;
93     }
94 
95     dispr->events[sock].ctx = ctx;
96     dispr->events[sock].callback = cb;
97 
98     FD_SET(sock, &dispr->fdset);
99     if (sock > dispr->max_sock) {
100         dispr->max_sock = sock;
101     }
102     DPRINT("Added sock %d for watching. max_sock: %d\n",
103            sock, dispr->max_sock);
104     return 0;
105 }
106 
107 static int
108 dispatcher_remove(Dispatcher *dispr, int sock)
109 {
110     if (sock >= FD_SETSIZE) {
111         fprintf(stderr,
112                 "Error: Failed to remove event. sock %d should be less than %d\n",
113                 sock, FD_SETSIZE);
114         return -1;
115     }
116 
117     FD_CLR(sock, &dispr->fdset);
118     DPRINT("Sock %d removed from dispatcher watch.\n", sock);
119     return 0;
120 }
121 
122 /* timeout in us */
123 static int
124 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
125 {
126     struct timeval tv;
127     tv.tv_sec = timeout / 1000000;
128     tv.tv_usec = timeout % 1000000;
129 
130     fd_set fdset = dispr->fdset;
131 
132     /* wait until some of sockets become readable. */
133     int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
134 
135     if (rc == -1) {
136         vubr_die("select");
137     }
138 
139     /* Timeout */
140     if (rc == 0) {
141         return 0;
142     }
143 
144     /* Now call callback for every ready socket. */
145 
146     int sock;
147     for (sock = 0; sock < dispr->max_sock + 1; sock++) {
148         /* The callback on a socket can remove other sockets from the
149          * dispatcher, thus we have to check that the socket is
150          * still not removed from dispatcher's list
151          */
152         if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
153             Event *e = &dispr->events[sock];
154             e->callback(sock, e->ctx);
155         }
156     }
157 
158     return 0;
159 }
160 
161 static void
162 vubr_handle_tx(VuDev *dev, int qidx)
163 {
164     VuVirtq *vq = vu_get_queue(dev, qidx);
165     VubrDev *vubr = container_of(dev, VubrDev, vudev);
166     int hdrlen = vubr->hdrlen;
167     VuVirtqElement *elem = NULL;
168 
169     assert(qidx % 2);
170 
171     for (;;) {
172         ssize_t ret;
173         unsigned int out_num;
174         struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
175 
176         elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
177         if (!elem) {
178             break;
179         }
180 
181         out_num = elem->out_num;
182         out_sg = elem->out_sg;
183         if (out_num < 1) {
184             fprintf(stderr, "virtio-net header not in first element\n");
185             break;
186         }
187         if (VHOST_USER_BRIDGE_DEBUG) {
188             iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
189         }
190 
191         if (hdrlen) {
192             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
193                                        out_sg, out_num,
194                                        hdrlen, -1);
195             out_num = sg_num;
196             out_sg = sg;
197         }
198 
199         struct msghdr msg = {
200             .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
201             .msg_namelen = sizeof(struct sockaddr_in),
202             .msg_iov = out_sg,
203             .msg_iovlen = out_num,
204         };
205         do {
206             ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
207         } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
208 
209         if (ret == -1) {
210             vubr_die("sendmsg()");
211         }
212 
213         vu_queue_push(dev, vq, elem, 0);
214         vu_queue_notify(dev, vq);
215 
216         free(elem);
217         elem = NULL;
218     }
219 
220     free(elem);
221 }
222 
223 
224 /* this function reverse the effect of iov_discard_front() it must be
225  * called with 'front' being the original struct iovec and 'bytes'
226  * being the number of bytes you shaved off
227  */
228 static void
229 iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
230 {
231     struct iovec *cur;
232 
233     for (cur = front; cur != iov; cur++) {
234         assert(bytes >= cur->iov_len);
235         bytes -= cur->iov_len;
236     }
237 
238     cur->iov_base -= bytes;
239     cur->iov_len += bytes;
240 }
241 
242 static void
243 iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
244 {
245     unsigned i;
246 
247     for (i = 0; i < iovc; i++, iov++) {
248         if (bytes < iov->iov_len) {
249             iov->iov_len = bytes;
250             return;
251         }
252 
253         bytes -= iov->iov_len;
254     }
255 
256     assert(!"couldn't truncate iov");
257 }
258 
259 static void
260 vubr_backend_recv_cb(int sock, void *ctx)
261 {
262     VubrDev *vubr = (VubrDev *) ctx;
263     VuDev *dev = &vubr->vudev;
264     VuVirtq *vq = vu_get_queue(dev, 0);
265     VuVirtqElement *elem = NULL;
266     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
267     struct virtio_net_hdr_mrg_rxbuf mhdr;
268     unsigned mhdr_cnt = 0;
269     int hdrlen = vubr->hdrlen;
270     int i = 0;
271     struct virtio_net_hdr hdr = {
272         .flags = 0,
273         .gso_type = VIRTIO_NET_HDR_GSO_NONE
274     };
275 
276     DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
277     DPRINT("    hdrlen = %d\n", hdrlen);
278 
279     if (!vu_queue_enabled(dev, vq) ||
280         !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
281         DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
282         return;
283     }
284 
285     do {
286         struct iovec *sg;
287         ssize_t ret, total = 0;
288         unsigned int num;
289 
290         elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
291         if (!elem) {
292             break;
293         }
294 
295         if (elem->in_num < 1) {
296             fprintf(stderr, "virtio-net contains no in buffers\n");
297             break;
298         }
299 
300         sg = elem->in_sg;
301         num = elem->in_num;
302         if (i == 0) {
303             if (hdrlen == 12) {
304                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
305                                     sg, elem->in_num,
306                                     offsetof(typeof(mhdr), num_buffers),
307                                     sizeof(mhdr.num_buffers));
308             }
309             iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
310             total += hdrlen;
311             ret = iov_discard_front(&sg, &num, hdrlen);
312             assert(ret == hdrlen);
313         }
314 
315         struct msghdr msg = {
316             .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
317             .msg_namelen = sizeof(struct sockaddr_in),
318             .msg_iov = sg,
319             .msg_iovlen = elem->in_num,
320             .msg_flags = MSG_DONTWAIT,
321         };
322         do {
323             ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
324         } while (ret == -1 && (errno == EINTR));
325 
326         if (i == 0) {
327             iov_restore_front(elem->in_sg, sg, hdrlen);
328         }
329 
330         if (ret == -1) {
331             if (errno == EWOULDBLOCK) {
332                 vu_queue_rewind(dev, vq, 1);
333                 break;
334             }
335 
336             vubr_die("recvmsg()");
337         }
338 
339         total += ret;
340         iov_truncate(elem->in_sg, elem->in_num, total);
341         vu_queue_fill(dev, vq, elem, total, i++);
342 
343         free(elem);
344         elem = NULL;
345     } while (false); /* could loop if DONTWAIT worked? */
346 
347     if (mhdr_cnt) {
348         mhdr.num_buffers = i;
349         iov_from_buf(mhdr_sg, mhdr_cnt,
350                      0,
351                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
352     }
353 
354     vu_queue_flush(dev, vq, i);
355     vu_queue_notify(dev, vq);
356 
357     free(elem);
358 }
359 
360 static void
361 vubr_receive_cb(int sock, void *ctx)
362 {
363     VubrDev *vubr = (VubrDev *)ctx;
364 
365     if (!vu_dispatch(&vubr->vudev)) {
366         fprintf(stderr, "Error while dispatching\n");
367     }
368 }
369 
370 typedef struct WatchData {
371     VuDev *dev;
372     vu_watch_cb cb;
373     void *data;
374 } WatchData;
375 
376 static void
377 watch_cb(int sock, void *ctx)
378 {
379     struct WatchData *wd = ctx;
380 
381     wd->cb(wd->dev, VU_WATCH_IN, wd->data);
382 }
383 
384 static void
385 vubr_set_watch(VuDev *dev, int fd, int condition,
386                vu_watch_cb cb, void *data)
387 {
388     VubrDev *vubr = container_of(dev, VubrDev, vudev);
389     static WatchData watches[FD_SETSIZE];
390     struct WatchData *wd = &watches[fd];
391 
392     wd->cb = cb;
393     wd->data = data;
394     wd->dev = dev;
395     dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
396 }
397 
398 static void
399 vubr_remove_watch(VuDev *dev, int fd)
400 {
401     VubrDev *vubr = container_of(dev, VubrDev, vudev);
402 
403     dispatcher_remove(&vubr->dispatcher, fd);
404 }
405 
406 static int
407 vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
408 {
409     DPRINT("Function %s() not implemented yet.\n", __func__);
410     return 0;
411 }
412 
413 static int
414 vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
415 {
416     switch (vmsg->request) {
417     case VHOST_USER_SEND_RARP:
418         *do_reply = vubr_send_rarp_exec(dev, vmsg);
419         return 1;
420     default:
421         /* let the library handle the rest */
422         return 0;
423     }
424 
425     return 0;
426 }
427 
428 static void
429 vubr_set_features(VuDev *dev, uint64_t features)
430 {
431     VubrDev *vubr = container_of(dev, VubrDev, vudev);
432 
433     if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
434         (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
435         vubr->hdrlen = 12;
436     } else {
437         vubr->hdrlen = 10;
438     }
439 }
440 
441 static uint64_t
442 vubr_get_features(VuDev *dev)
443 {
444     return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
445         1ULL << VIRTIO_NET_F_MRG_RXBUF;
446 }
447 
448 static void
449 vubr_queue_set_started(VuDev *dev, int qidx, bool started)
450 {
451     VuVirtq *vq = vu_get_queue(dev, qidx);
452 
453     if (qidx % 2 == 1) {
454         vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
455     }
456 }
457 
458 static void
459 vubr_panic(VuDev *dev, const char *msg)
460 {
461     VubrDev *vubr = container_of(dev, VubrDev, vudev);
462 
463     fprintf(stderr, "PANIC: %s\n", msg);
464 
465     dispatcher_remove(&vubr->dispatcher, dev->sock);
466     vubr->quit = 1;
467 }
468 
469 static bool
470 vubr_queue_is_processed_in_order(VuDev *dev, int qidx)
471 {
472     return true;
473 }
474 
475 static const VuDevIface vuiface = {
476     .get_features = vubr_get_features,
477     .set_features = vubr_set_features,
478     .process_msg = vubr_process_msg,
479     .queue_set_started = vubr_queue_set_started,
480     .queue_is_processed_in_order = vubr_queue_is_processed_in_order,
481 };
482 
483 static void
484 vubr_accept_cb(int sock, void *ctx)
485 {
486     VubrDev *dev = (VubrDev *)ctx;
487     int conn_fd;
488     struct sockaddr_un un;
489     socklen_t len = sizeof(un);
490 
491     conn_fd = accept(sock, (struct sockaddr *) &un, &len);
492     if (conn_fd == -1) {
493         vubr_die("accept()");
494     }
495     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
496 
497     vu_init(&dev->vudev,
498             conn_fd,
499             vubr_panic,
500             vubr_set_watch,
501             vubr_remove_watch,
502             &vuiface);
503 
504     dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
505     dispatcher_remove(&dev->dispatcher, sock);
506 }
507 
508 static VubrDev *
509 vubr_new(const char *path, bool client)
510 {
511     VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
512     struct sockaddr_un un;
513     CallbackFunc cb;
514     size_t len;
515 
516     /* Get a UNIX socket. */
517     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
518     if (dev->sock == -1) {
519         vubr_die("socket");
520     }
521 
522     un.sun_family = AF_UNIX;
523     strcpy(un.sun_path, path);
524     len = sizeof(un.sun_family) + strlen(path);
525 
526     if (!client) {
527         unlink(path);
528 
529         if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
530             vubr_die("bind");
531         }
532 
533         if (listen(dev->sock, 1) == -1) {
534             vubr_die("listen");
535         }
536         cb = vubr_accept_cb;
537 
538         DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
539     } else {
540         if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
541             vubr_die("connect");
542         }
543         vu_init(&dev->vudev,
544                 dev->sock,
545                 vubr_panic,
546                 vubr_set_watch,
547                 vubr_remove_watch,
548                 &vuiface);
549         cb = vubr_receive_cb;
550     }
551 
552     dispatcher_init(&dev->dispatcher);
553 
554     dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
555 
556     return dev;
557 }
558 
559 static void
560 vubr_set_host(struct sockaddr_in *saddr, const char *host)
561 {
562     if (isdigit(host[0])) {
563         if (!inet_aton(host, &saddr->sin_addr)) {
564             fprintf(stderr, "inet_aton() failed.\n");
565             exit(1);
566         }
567     } else {
568         struct hostent *he = gethostbyname(host);
569 
570         if (!he) {
571             fprintf(stderr, "gethostbyname() failed.\n");
572             exit(1);
573         }
574         saddr->sin_addr = *(struct in_addr *)he->h_addr;
575     }
576 }
577 
578 static void
579 vubr_backend_udp_setup(VubrDev *dev,
580                        const char *local_host,
581                        const char *local_port,
582                        const char *remote_host,
583                        const char *remote_port)
584 {
585     int sock;
586     const char *r;
587 
588     int lport, rport;
589 
590     lport = strtol(local_port, (char **)&r, 0);
591     if (r == local_port) {
592         fprintf(stderr, "lport parsing failed.\n");
593         exit(1);
594     }
595 
596     rport = strtol(remote_port, (char **)&r, 0);
597     if (r == remote_port) {
598         fprintf(stderr, "rport parsing failed.\n");
599         exit(1);
600     }
601 
602     struct sockaddr_in si_local = {
603         .sin_family = AF_INET,
604         .sin_port = htons(lport),
605     };
606 
607     vubr_set_host(&si_local, local_host);
608 
609     /* setup destination for sends */
610     dev->backend_udp_dest = (struct sockaddr_in) {
611         .sin_family = AF_INET,
612         .sin_port = htons(rport),
613     };
614     vubr_set_host(&dev->backend_udp_dest, remote_host);
615 
616     sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
617     if (sock == -1) {
618         vubr_die("socket");
619     }
620 
621     if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
622         vubr_die("bind");
623     }
624 
625     dev->backend_udp_sock = sock;
626     dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
627     DPRINT("Waiting for data from udp backend on %s:%d...\n",
628            local_host, lport);
629 }
630 
631 static void
632 vubr_run(VubrDev *dev)
633 {
634     while (!dev->quit) {
635         /* timeout 200ms */
636         dispatcher_wait(&dev->dispatcher, 200000);
637         /* Here one can try polling strategy. */
638     }
639 }
640 
641 static int
642 vubr_parse_host_port(const char **host, const char **port, const char *buf)
643 {
644     char *p = strchr(buf, ':');
645 
646     if (!p) {
647         return -1;
648     }
649     *p = '\0';
650     *host = strdup(buf);
651     *port = strdup(p + 1);
652     return 0;
653 }
654 
655 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
656 #define DEFAULT_LHOST "127.0.0.1"
657 #define DEFAULT_LPORT "4444"
658 #define DEFAULT_RHOST "127.0.0.1"
659 #define DEFAULT_RPORT "5555"
660 
661 static const char *ud_socket_path = DEFAULT_UD_SOCKET;
662 static const char *lhost = DEFAULT_LHOST;
663 static const char *lport = DEFAULT_LPORT;
664 static const char *rhost = DEFAULT_RHOST;
665 static const char *rport = DEFAULT_RPORT;
666 
667 int
668 main(int argc, char *argv[])
669 {
670     VubrDev *dev;
671     int opt;
672     bool client = false;
673 
674     while ((opt = getopt(argc, argv, "l:r:u:c")) != -1) {
675 
676         switch (opt) {
677         case 'l':
678             if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
679                 goto out;
680             }
681             break;
682         case 'r':
683             if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
684                 goto out;
685             }
686             break;
687         case 'u':
688             ud_socket_path = strdup(optarg);
689             break;
690         case 'c':
691             client = true;
692             break;
693         default:
694             goto out;
695         }
696     }
697 
698     DPRINT("ud socket: %s (%s)\n", ud_socket_path,
699            client ? "client" : "server");
700     DPRINT("local:     %s:%s\n", lhost, lport);
701     DPRINT("remote:    %s:%s\n", rhost, rport);
702 
703     dev = vubr_new(ud_socket_path, client);
704     if (!dev) {
705         return 1;
706     }
707 
708     vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
709     vubr_run(dev);
710 
711     vu_deinit(&dev->vudev);
712 
713     return 0;
714 
715 out:
716     fprintf(stderr, "Usage: %s ", argv[0]);
717     fprintf(stderr, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
718     fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
719             DEFAULT_UD_SOCKET);
720     fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
721             DEFAULT_LHOST, DEFAULT_LPORT);
722     fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
723             DEFAULT_RHOST, DEFAULT_RPORT);
724     fprintf(stderr, "\t-c client mode\n");
725 
726     return 1;
727 }
728