xref: /openbmc/qemu/net/tap.c (revision 63186e56)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009 Red Hat, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "tap.h"
27 
28 #include "config-host.h"
29 
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/wait.h>
33 #include <sys/socket.h>
34 #include <net/if.h>
35 
36 #include "net.h"
37 #include "clients.h"
38 #include "monitor.h"
39 #include "sysemu.h"
40 #include "qemu-char.h"
41 #include "qemu-common.h"
42 #include "qemu-error.h"
43 
44 #include "net/tap-linux.h"
45 
46 #include "hw/vhost_net.h"
47 
48 /* Maximum GSO packet size (64k) plus plenty of room for
49  * the ethernet and virtio_net headers
50  */
51 #define TAP_BUFSIZE (4096 + 65536)
52 
53 typedef struct TAPState {
54     NetClientState nc;
55     int fd;
56     char down_script[1024];
57     char down_script_arg[128];
58     uint8_t buf[TAP_BUFSIZE];
59     unsigned int read_poll : 1;
60     unsigned int write_poll : 1;
61     unsigned int using_vnet_hdr : 1;
62     unsigned int has_ufo: 1;
63     VHostNetState *vhost_net;
64     unsigned host_vnet_hdr_len;
65 } TAPState;
66 
67 static int launch_script(const char *setup_script, const char *ifname, int fd);
68 
69 static int tap_can_send(void *opaque);
70 static void tap_send(void *opaque);
71 static void tap_writable(void *opaque);
72 
73 static void tap_update_fd_handler(TAPState *s)
74 {
75     qemu_set_fd_handler2(s->fd,
76                          s->read_poll  ? tap_can_send : NULL,
77                          s->read_poll  ? tap_send     : NULL,
78                          s->write_poll ? tap_writable : NULL,
79                          s);
80 }
81 
82 static void tap_read_poll(TAPState *s, int enable)
83 {
84     s->read_poll = !!enable;
85     tap_update_fd_handler(s);
86 }
87 
88 static void tap_write_poll(TAPState *s, int enable)
89 {
90     s->write_poll = !!enable;
91     tap_update_fd_handler(s);
92 }
93 
94 static void tap_writable(void *opaque)
95 {
96     TAPState *s = opaque;
97 
98     tap_write_poll(s, 0);
99 
100     qemu_flush_queued_packets(&s->nc);
101 }
102 
103 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
104 {
105     ssize_t len;
106 
107     do {
108         len = writev(s->fd, iov, iovcnt);
109     } while (len == -1 && errno == EINTR);
110 
111     if (len == -1 && errno == EAGAIN) {
112         tap_write_poll(s, 1);
113         return 0;
114     }
115 
116     return len;
117 }
118 
119 static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
120                                int iovcnt)
121 {
122     TAPState *s = DO_UPCAST(TAPState, nc, nc);
123     const struct iovec *iovp = iov;
124     struct iovec iov_copy[iovcnt + 1];
125     struct virtio_net_hdr_mrg_rxbuf hdr = { };
126 
127     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
128         iov_copy[0].iov_base = &hdr;
129         iov_copy[0].iov_len =  s->host_vnet_hdr_len;
130         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
131         iovp = iov_copy;
132         iovcnt++;
133     }
134 
135     return tap_write_packet(s, iovp, iovcnt);
136 }
137 
138 static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
139 {
140     TAPState *s = DO_UPCAST(TAPState, nc, nc);
141     struct iovec iov[2];
142     int iovcnt = 0;
143     struct virtio_net_hdr_mrg_rxbuf hdr = { };
144 
145     if (s->host_vnet_hdr_len) {
146         iov[iovcnt].iov_base = &hdr;
147         iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
148         iovcnt++;
149     }
150 
151     iov[iovcnt].iov_base = (char *)buf;
152     iov[iovcnt].iov_len  = size;
153     iovcnt++;
154 
155     return tap_write_packet(s, iov, iovcnt);
156 }
157 
158 static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
159 {
160     TAPState *s = DO_UPCAST(TAPState, nc, nc);
161     struct iovec iov[1];
162 
163     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
164         return tap_receive_raw(nc, buf, size);
165     }
166 
167     iov[0].iov_base = (char *)buf;
168     iov[0].iov_len  = size;
169 
170     return tap_write_packet(s, iov, 1);
171 }
172 
173 static int tap_can_send(void *opaque)
174 {
175     TAPState *s = opaque;
176 
177     return qemu_can_send_packet(&s->nc);
178 }
179 
180 #ifndef __sun__
181 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
182 {
183     return read(tapfd, buf, maxlen);
184 }
185 #endif
186 
187 static void tap_send_completed(NetClientState *nc, ssize_t len)
188 {
189     TAPState *s = DO_UPCAST(TAPState, nc, nc);
190     tap_read_poll(s, 1);
191 }
192 
193 static void tap_send(void *opaque)
194 {
195     TAPState *s = opaque;
196     int size;
197 
198     do {
199         uint8_t *buf = s->buf;
200 
201         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
202         if (size <= 0) {
203             break;
204         }
205 
206         if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
207             buf  += s->host_vnet_hdr_len;
208             size -= s->host_vnet_hdr_len;
209         }
210 
211         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
212         if (size == 0) {
213             tap_read_poll(s, 0);
214         }
215     } while (size > 0 && qemu_can_send_packet(&s->nc));
216 }
217 
218 int tap_has_ufo(NetClientState *nc)
219 {
220     TAPState *s = DO_UPCAST(TAPState, nc, nc);
221 
222     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
223 
224     return s->has_ufo;
225 }
226 
227 int tap_has_vnet_hdr(NetClientState *nc)
228 {
229     TAPState *s = DO_UPCAST(TAPState, nc, nc);
230 
231     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
232 
233     return !!s->host_vnet_hdr_len;
234 }
235 
236 int tap_has_vnet_hdr_len(NetClientState *nc, int len)
237 {
238     TAPState *s = DO_UPCAST(TAPState, nc, nc);
239 
240     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
241 
242     return tap_probe_vnet_hdr_len(s->fd, len);
243 }
244 
245 void tap_set_vnet_hdr_len(NetClientState *nc, int len)
246 {
247     TAPState *s = DO_UPCAST(TAPState, nc, nc);
248 
249     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
250     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
251            len == sizeof(struct virtio_net_hdr));
252 
253     tap_fd_set_vnet_hdr_len(s->fd, len);
254     s->host_vnet_hdr_len = len;
255 }
256 
257 void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr)
258 {
259     TAPState *s = DO_UPCAST(TAPState, nc, nc);
260 
261     using_vnet_hdr = using_vnet_hdr != 0;
262 
263     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
264     assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
265 
266     s->using_vnet_hdr = using_vnet_hdr;
267 }
268 
269 void tap_set_offload(NetClientState *nc, int csum, int tso4,
270                      int tso6, int ecn, int ufo)
271 {
272     TAPState *s = DO_UPCAST(TAPState, nc, nc);
273     if (s->fd < 0) {
274         return;
275     }
276 
277     tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
278 }
279 
280 static void tap_cleanup(NetClientState *nc)
281 {
282     TAPState *s = DO_UPCAST(TAPState, nc, nc);
283 
284     if (s->vhost_net) {
285         vhost_net_cleanup(s->vhost_net);
286         s->vhost_net = NULL;
287     }
288 
289     qemu_purge_queued_packets(nc);
290 
291     if (s->down_script[0])
292         launch_script(s->down_script, s->down_script_arg, s->fd);
293 
294     tap_read_poll(s, 0);
295     tap_write_poll(s, 0);
296     close(s->fd);
297     s->fd = -1;
298 }
299 
300 static void tap_poll(NetClientState *nc, bool enable)
301 {
302     TAPState *s = DO_UPCAST(TAPState, nc, nc);
303     tap_read_poll(s, enable);
304     tap_write_poll(s, enable);
305 }
306 
307 int tap_get_fd(NetClientState *nc)
308 {
309     TAPState *s = DO_UPCAST(TAPState, nc, nc);
310     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
311     return s->fd;
312 }
313 
314 /* fd support */
315 
316 static NetClientInfo net_tap_info = {
317     .type = NET_CLIENT_OPTIONS_KIND_TAP,
318     .size = sizeof(TAPState),
319     .receive = tap_receive,
320     .receive_raw = tap_receive_raw,
321     .receive_iov = tap_receive_iov,
322     .poll = tap_poll,
323     .cleanup = tap_cleanup,
324 };
325 
326 static TAPState *net_tap_fd_init(NetClientState *peer,
327                                  const char *model,
328                                  const char *name,
329                                  int fd,
330                                  int vnet_hdr)
331 {
332     NetClientState *nc;
333     TAPState *s;
334 
335     nc = qemu_new_net_client(&net_tap_info, peer, model, name);
336 
337     s = DO_UPCAST(TAPState, nc, nc);
338 
339     s->fd = fd;
340     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
341     s->using_vnet_hdr = 0;
342     s->has_ufo = tap_probe_has_ufo(s->fd);
343     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
344     tap_read_poll(s, 1);
345     s->vhost_net = NULL;
346     return s;
347 }
348 
349 static int launch_script(const char *setup_script, const char *ifname, int fd)
350 {
351     int pid, status;
352     char *args[3];
353     char **parg;
354 
355     /* try to launch network script */
356     pid = fork();
357     if (pid == 0) {
358         int open_max = sysconf(_SC_OPEN_MAX), i;
359 
360         for (i = 0; i < open_max; i++) {
361             if (i != STDIN_FILENO &&
362                 i != STDOUT_FILENO &&
363                 i != STDERR_FILENO &&
364                 i != fd) {
365                 close(i);
366             }
367         }
368         parg = args;
369         *parg++ = (char *)setup_script;
370         *parg++ = (char *)ifname;
371         *parg = NULL;
372         execv(setup_script, args);
373         _exit(1);
374     } else if (pid > 0) {
375         while (waitpid(pid, &status, 0) != pid) {
376             /* loop */
377         }
378 
379         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
380             return 0;
381         }
382     }
383     fprintf(stderr, "%s: could not launch network script\n", setup_script);
384     return -1;
385 }
386 
387 static int recv_fd(int c)
388 {
389     int fd;
390     uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
391     struct msghdr msg = {
392         .msg_control = msgbuf,
393         .msg_controllen = sizeof(msgbuf),
394     };
395     struct cmsghdr *cmsg;
396     struct iovec iov;
397     uint8_t req[1];
398     ssize_t len;
399 
400     cmsg = CMSG_FIRSTHDR(&msg);
401     cmsg->cmsg_level = SOL_SOCKET;
402     cmsg->cmsg_type = SCM_RIGHTS;
403     cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
404     msg.msg_controllen = cmsg->cmsg_len;
405 
406     iov.iov_base = req;
407     iov.iov_len = sizeof(req);
408 
409     msg.msg_iov = &iov;
410     msg.msg_iovlen = 1;
411 
412     len = recvmsg(c, &msg, 0);
413     if (len > 0) {
414         memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
415         return fd;
416     }
417 
418     return len;
419 }
420 
421 static int net_bridge_run_helper(const char *helper, const char *bridge)
422 {
423     sigset_t oldmask, mask;
424     int pid, status;
425     char *args[5];
426     char **parg;
427     int sv[2];
428 
429     sigemptyset(&mask);
430     sigaddset(&mask, SIGCHLD);
431     sigprocmask(SIG_BLOCK, &mask, &oldmask);
432 
433     if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
434         return -1;
435     }
436 
437     /* try to launch bridge helper */
438     pid = fork();
439     if (pid == 0) {
440         int open_max = sysconf(_SC_OPEN_MAX), i;
441         char fd_buf[6+10];
442         char br_buf[6+IFNAMSIZ] = {0};
443         char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];
444 
445         for (i = 0; i < open_max; i++) {
446             if (i != STDIN_FILENO &&
447                 i != STDOUT_FILENO &&
448                 i != STDERR_FILENO &&
449                 i != sv[1]) {
450                 close(i);
451             }
452         }
453 
454         snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);
455 
456         if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
457             /* assume helper is a command */
458 
459             if (strstr(helper, "--br=") == NULL) {
460                 snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
461             }
462 
463             snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
464                      helper, "--use-vnet", fd_buf, br_buf);
465 
466             parg = args;
467             *parg++ = (char *)"sh";
468             *parg++ = (char *)"-c";
469             *parg++ = helper_cmd;
470             *parg++ = NULL;
471 
472             execv("/bin/sh", args);
473         } else {
474             /* assume helper is just the executable path name */
475 
476             snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
477 
478             parg = args;
479             *parg++ = (char *)helper;
480             *parg++ = (char *)"--use-vnet";
481             *parg++ = fd_buf;
482             *parg++ = br_buf;
483             *parg++ = NULL;
484 
485             execv(helper, args);
486         }
487         _exit(1);
488 
489     } else if (pid > 0) {
490         int fd;
491 
492         close(sv[1]);
493 
494         do {
495             fd = recv_fd(sv[0]);
496         } while (fd == -1 && errno == EINTR);
497 
498         close(sv[0]);
499 
500         while (waitpid(pid, &status, 0) != pid) {
501             /* loop */
502         }
503         sigprocmask(SIG_SETMASK, &oldmask, NULL);
504         if (fd < 0) {
505             fprintf(stderr, "failed to recv file descriptor\n");
506             return -1;
507         }
508 
509         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
510             return fd;
511         }
512     }
513     fprintf(stderr, "failed to launch bridge helper\n");
514     return -1;
515 }
516 
517 int net_init_bridge(const NetClientOptions *opts, const char *name,
518                     NetClientState *peer)
519 {
520     const NetdevBridgeOptions *bridge;
521     const char *helper, *br;
522 
523     TAPState *s;
524     int fd, vnet_hdr;
525 
526     assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE);
527     bridge = opts->bridge;
528 
529     helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
530     br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
531 
532     fd = net_bridge_run_helper(helper, br);
533     if (fd == -1) {
534         return -1;
535     }
536 
537     fcntl(fd, F_SETFL, O_NONBLOCK);
538 
539     vnet_hdr = tap_probe_vnet_hdr(fd);
540 
541     s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
542     if (!s) {
543         close(fd);
544         return -1;
545     }
546 
547     snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
548              br);
549 
550     return 0;
551 }
552 
553 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
554                         const char *setup_script, char *ifname,
555                         size_t ifname_sz)
556 {
557     int fd, vnet_hdr_required;
558 
559     if (tap->has_ifname) {
560         pstrcpy(ifname, ifname_sz, tap->ifname);
561     } else {
562         assert(ifname_sz > 0);
563         ifname[0] = '\0';
564     }
565 
566     if (tap->has_vnet_hdr) {
567         *vnet_hdr = tap->vnet_hdr;
568         vnet_hdr_required = *vnet_hdr;
569     } else {
570         *vnet_hdr = 1;
571         vnet_hdr_required = 0;
572     }
573 
574     TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required));
575     if (fd < 0) {
576         return -1;
577     }
578 
579     if (setup_script &&
580         setup_script[0] != '\0' &&
581         strcmp(setup_script, "no") != 0 &&
582         launch_script(setup_script, ifname, fd)) {
583         close(fd);
584         return -1;
585     }
586 
587     return fd;
588 }
589 
590 int net_init_tap(const NetClientOptions *opts, const char *name,
591                  NetClientState *peer)
592 {
593     const NetdevTapOptions *tap;
594 
595     int fd, vnet_hdr = 0;
596     const char *model;
597     TAPState *s;
598 
599     /* for the no-fd, no-helper case */
600     const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
601     char ifname[128];
602 
603     assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
604     tap = opts->tap;
605 
606     if (tap->has_fd) {
607         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
608             tap->has_vnet_hdr || tap->has_helper) {
609             error_report("ifname=, script=, downscript=, vnet_hdr=, "
610                          "and helper= are invalid with fd=");
611             return -1;
612         }
613 
614         fd = monitor_handle_fd_param(cur_mon, tap->fd);
615         if (fd == -1) {
616             return -1;
617         }
618 
619         fcntl(fd, F_SETFL, O_NONBLOCK);
620 
621         vnet_hdr = tap_probe_vnet_hdr(fd);
622 
623         model = "tap";
624 
625     } else if (tap->has_helper) {
626         if (tap->has_ifname || tap->has_script || tap->has_downscript ||
627             tap->has_vnet_hdr) {
628             error_report("ifname=, script=, downscript=, and vnet_hdr= "
629                          "are invalid with helper=");
630             return -1;
631         }
632 
633         fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE);
634         if (fd == -1) {
635             return -1;
636         }
637 
638         fcntl(fd, F_SETFL, O_NONBLOCK);
639 
640         vnet_hdr = tap_probe_vnet_hdr(fd);
641 
642         model = "bridge";
643 
644     } else {
645         script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
646         fd = net_tap_init(tap, &vnet_hdr, script, ifname, sizeof ifname);
647         if (fd == -1) {
648             return -1;
649         }
650 
651         model = "tap";
652     }
653 
654     s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
655     if (!s) {
656         close(fd);
657         return -1;
658     }
659 
660     if (tap_set_sndbuf(s->fd, tap) < 0) {
661         return -1;
662     }
663 
664     if (tap->has_fd) {
665         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
666     } else if (tap->has_helper) {
667         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
668                  tap->helper);
669     } else {
670         const char *downscript;
671 
672         downscript = tap->has_downscript ? tap->downscript :
673                                            DEFAULT_NETWORK_DOWN_SCRIPT;
674 
675         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
676                  "ifname=%s,script=%s,downscript=%s", ifname, script,
677                  downscript);
678 
679         if (strcmp(downscript, "no") != 0) {
680             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
681             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
682         }
683     }
684 
685     if (tap->has_vhost ? tap->vhost :
686         tap->has_vhostfd || (tap->has_vhostforce && tap->vhostforce)) {
687         int vhostfd;
688 
689         if (tap->has_vhostfd) {
690             vhostfd = monitor_handle_fd_param(cur_mon, tap->vhostfd);
691             if (vhostfd == -1) {
692                 return -1;
693             }
694         } else {
695             vhostfd = -1;
696         }
697 
698         s->vhost_net = vhost_net_init(&s->nc, vhostfd,
699                                       tap->has_vhostforce && tap->vhostforce);
700         if (!s->vhost_net) {
701             error_report("vhost-net requested but could not be initialized");
702             return -1;
703         }
704     } else if (tap->has_vhostfd) {
705         error_report("vhostfd= is not valid without vhost");
706         return -1;
707     }
708 
709     return 0;
710 }
711 
712 VHostNetState *tap_get_vhost_net(NetClientState *nc)
713 {
714     TAPState *s = DO_UPCAST(TAPState, nc, nc);
715     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
716     return s->vhost_net;
717 }
718