xref: /openbmc/qemu/net/tap.c (revision 52a7ff52)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009 Red Hat, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "tap_int.h"
28 
29 
30 #include <sys/ioctl.h>
31 #include <sys/wait.h>
32 #include <sys/socket.h>
33 #include <net/if.h>
34 
35 #include "net/eth.h"
36 #include "net/net.h"
37 #include "clients.h"
38 #include "monitor/monitor.h"
39 #include "sysemu/sysemu.h"
40 #include "qapi/error.h"
41 #include "qemu/cutils.h"
42 #include "qemu/error-report.h"
43 #include "qemu/main-loop.h"
44 #include "qemu/sockets.h"
45 
46 #include "net/tap.h"
47 
48 #include "net/vhost_net.h"
49 
50 typedef struct TAPState {
51     NetClientState nc;
52     int fd;
53     char down_script[1024];
54     char down_script_arg[128];
55     uint8_t buf[NET_BUFSIZE];
56     bool read_poll;
57     bool write_poll;
58     bool using_vnet_hdr;
59     bool has_ufo;
60     bool has_uso;
61     bool enabled;
62     VHostNetState *vhost_net;
63     unsigned host_vnet_hdr_len;
64     Notifier exit;
65 } TAPState;
66 
67 static void launch_script(const char *setup_script, const char *ifname,
68                           int fd, Error **errp);
69 
70 static void tap_send(void *opaque);
71 static void tap_writable(void *opaque);
72 
73 static void tap_update_fd_handler(TAPState *s)
74 {
75     qemu_set_fd_handler(s->fd,
76                         s->read_poll && s->enabled ? tap_send : NULL,
77                         s->write_poll && s->enabled ? tap_writable : NULL,
78                         s);
79 }
80 
81 static void tap_read_poll(TAPState *s, bool enable)
82 {
83     s->read_poll = enable;
84     tap_update_fd_handler(s);
85 }
86 
87 static void tap_write_poll(TAPState *s, bool enable)
88 {
89     s->write_poll = enable;
90     tap_update_fd_handler(s);
91 }
92 
93 static void tap_writable(void *opaque)
94 {
95     TAPState *s = opaque;
96 
97     tap_write_poll(s, false);
98 
99     qemu_flush_queued_packets(&s->nc);
100 }
101 
102 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
103 {
104     ssize_t len;
105 
106     len = RETRY_ON_EINTR(writev(s->fd, iov, iovcnt));
107 
108     if (len == -1 && errno == EAGAIN) {
109         tap_write_poll(s, true);
110         return 0;
111     }
112 
113     return len;
114 }
115 
116 static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
117                                int iovcnt)
118 {
119     TAPState *s = DO_UPCAST(TAPState, nc, nc);
120     const struct iovec *iovp = iov;
121     g_autofree struct iovec *iov_copy = NULL;
122     struct virtio_net_hdr_mrg_rxbuf hdr = { };
123 
124     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
125         iov_copy = g_new(struct iovec, iovcnt + 1);
126         iov_copy[0].iov_base = &hdr;
127         iov_copy[0].iov_len =  s->host_vnet_hdr_len;
128         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
129         iovp = iov_copy;
130         iovcnt++;
131     }
132 
133     return tap_write_packet(s, iovp, iovcnt);
134 }
135 
136 static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
137 {
138     TAPState *s = DO_UPCAST(TAPState, nc, nc);
139     struct iovec iov[2];
140     int iovcnt = 0;
141     struct virtio_net_hdr_mrg_rxbuf hdr = { };
142 
143     if (s->host_vnet_hdr_len) {
144         iov[iovcnt].iov_base = &hdr;
145         iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
146         iovcnt++;
147     }
148 
149     iov[iovcnt].iov_base = (char *)buf;
150     iov[iovcnt].iov_len  = size;
151     iovcnt++;
152 
153     return tap_write_packet(s, iov, iovcnt);
154 }
155 
156 static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
157 {
158     TAPState *s = DO_UPCAST(TAPState, nc, nc);
159     struct iovec iov[1];
160 
161     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
162         return tap_receive_raw(nc, buf, size);
163     }
164 
165     iov[0].iov_base = (char *)buf;
166     iov[0].iov_len  = size;
167 
168     return tap_write_packet(s, iov, 1);
169 }
170 
171 #ifndef __sun__
172 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
173 {
174     return read(tapfd, buf, maxlen);
175 }
176 #endif
177 
178 static void tap_send_completed(NetClientState *nc, ssize_t len)
179 {
180     TAPState *s = DO_UPCAST(TAPState, nc, nc);
181     tap_read_poll(s, true);
182 }
183 
184 static void tap_send(void *opaque)
185 {
186     TAPState *s = opaque;
187     int size;
188     int packets = 0;
189 
190     while (true) {
191         uint8_t *buf = s->buf;
192         uint8_t min_pkt[ETH_ZLEN];
193         size_t min_pktsz = sizeof(min_pkt);
194 
195         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
196         if (size <= 0) {
197             break;
198         }
199 
200         if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
201             buf  += s->host_vnet_hdr_len;
202             size -= s->host_vnet_hdr_len;
203         }
204 
205         if (net_peer_needs_padding(&s->nc)) {
206             if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
207                 buf = min_pkt;
208                 size = min_pktsz;
209             }
210         }
211 
212         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
213         if (size == 0) {
214             tap_read_poll(s, false);
215             break;
216         } else if (size < 0) {
217             break;
218         }
219 
220         /*
221          * When the host keeps receiving more packets while tap_send() is
222          * running we can hog the BQL.  Limit the number of
223          * packets that are processed per tap_send() callback to prevent
224          * stalling the guest.
225          */
226         packets++;
227         if (packets >= 50) {
228             break;
229         }
230     }
231 }
232 
233 static bool tap_has_ufo(NetClientState *nc)
234 {
235     TAPState *s = DO_UPCAST(TAPState, nc, nc);
236 
237     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
238 
239     return s->has_ufo;
240 }
241 
242 static bool tap_has_uso(NetClientState *nc)
243 {
244     TAPState *s = DO_UPCAST(TAPState, nc, nc);
245 
246     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
247 
248     return s->has_uso;
249 }
250 
251 static bool tap_has_vnet_hdr(NetClientState *nc)
252 {
253     TAPState *s = DO_UPCAST(TAPState, nc, nc);
254 
255     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
256 
257     return !!s->host_vnet_hdr_len;
258 }
259 
260 static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
261 {
262     return tap_has_vnet_hdr(nc);
263 }
264 
265 static int tap_get_vnet_hdr_len(NetClientState *nc)
266 {
267     TAPState *s = DO_UPCAST(TAPState, nc, nc);
268 
269     return s->host_vnet_hdr_len;
270 }
271 
272 static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
273 {
274     TAPState *s = DO_UPCAST(TAPState, nc, nc);
275 
276     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
277     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
278            len == sizeof(struct virtio_net_hdr) ||
279            len == sizeof(struct virtio_net_hdr_v1_hash));
280 
281     tap_fd_set_vnet_hdr_len(s->fd, len);
282     s->host_vnet_hdr_len = len;
283 }
284 
285 static bool tap_get_using_vnet_hdr(NetClientState *nc)
286 {
287     TAPState *s = DO_UPCAST(TAPState, nc, nc);
288 
289     return s->using_vnet_hdr;
290 }
291 
292 static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
293 {
294     TAPState *s = DO_UPCAST(TAPState, nc, nc);
295 
296     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
297     assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
298 
299     s->using_vnet_hdr = using_vnet_hdr;
300 }
301 
302 static int tap_set_vnet_le(NetClientState *nc, bool is_le)
303 {
304     TAPState *s = DO_UPCAST(TAPState, nc, nc);
305 
306     return tap_fd_set_vnet_le(s->fd, is_le);
307 }
308 
309 static int tap_set_vnet_be(NetClientState *nc, bool is_be)
310 {
311     TAPState *s = DO_UPCAST(TAPState, nc, nc);
312 
313     return tap_fd_set_vnet_be(s->fd, is_be);
314 }
315 
316 static void tap_set_offload(NetClientState *nc, int csum, int tso4,
317                      int tso6, int ecn, int ufo, int uso4, int uso6)
318 {
319     TAPState *s = DO_UPCAST(TAPState, nc, nc);
320     if (s->fd < 0) {
321         return;
322     }
323 
324     tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo, uso4, uso6);
325 }
326 
327 static void tap_exit_notify(Notifier *notifier, void *data)
328 {
329     TAPState *s = container_of(notifier, TAPState, exit);
330     Error *err = NULL;
331 
332     if (s->down_script[0]) {
333         launch_script(s->down_script, s->down_script_arg, s->fd, &err);
334         if (err) {
335             error_report_err(err);
336         }
337     }
338 }
339 
340 static void tap_cleanup(NetClientState *nc)
341 {
342     TAPState *s = DO_UPCAST(TAPState, nc, nc);
343 
344     if (s->vhost_net) {
345         vhost_net_cleanup(s->vhost_net);
346         g_free(s->vhost_net);
347         s->vhost_net = NULL;
348     }
349 
350     qemu_purge_queued_packets(nc);
351 
352     tap_exit_notify(&s->exit, NULL);
353     qemu_remove_exit_notifier(&s->exit);
354 
355     tap_read_poll(s, false);
356     tap_write_poll(s, false);
357     close(s->fd);
358     s->fd = -1;
359 }
360 
361 static void tap_poll(NetClientState *nc, bool enable)
362 {
363     TAPState *s = DO_UPCAST(TAPState, nc, nc);
364     tap_read_poll(s, enable);
365     tap_write_poll(s, enable);
366 }
367 
368 static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
369 {
370     TAPState *s = DO_UPCAST(TAPState, nc, nc);
371     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
372 
373     return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
374 }
375 
376 int tap_get_fd(NetClientState *nc)
377 {
378     TAPState *s = DO_UPCAST(TAPState, nc, nc);
379     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
380     return s->fd;
381 }
382 
383 /* fd support */
384 
385 static NetClientInfo net_tap_info = {
386     .type = NET_CLIENT_DRIVER_TAP,
387     .size = sizeof(TAPState),
388     .receive = tap_receive,
389     .receive_raw = tap_receive_raw,
390     .receive_iov = tap_receive_iov,
391     .poll = tap_poll,
392     .cleanup = tap_cleanup,
393     .has_ufo = tap_has_ufo,
394     .has_uso = tap_has_uso,
395     .has_vnet_hdr = tap_has_vnet_hdr,
396     .has_vnet_hdr_len = tap_has_vnet_hdr_len,
397     .get_using_vnet_hdr = tap_get_using_vnet_hdr,
398     .using_vnet_hdr = tap_using_vnet_hdr,
399     .set_offload = tap_set_offload,
400     .get_vnet_hdr_len = tap_get_vnet_hdr_len,
401     .set_vnet_hdr_len = tap_set_vnet_hdr_len,
402     .set_vnet_le = tap_set_vnet_le,
403     .set_vnet_be = tap_set_vnet_be,
404     .set_steering_ebpf = tap_set_steering_ebpf,
405 };
406 
407 static TAPState *net_tap_fd_init(NetClientState *peer,
408                                  const char *model,
409                                  const char *name,
410                                  int fd,
411                                  int vnet_hdr)
412 {
413     NetClientState *nc;
414     TAPState *s;
415 
416     nc = qemu_new_net_client(&net_tap_info, peer, model, name);
417 
418     s = DO_UPCAST(TAPState, nc, nc);
419 
420     s->fd = fd;
421     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
422     s->using_vnet_hdr = false;
423     s->has_ufo = tap_probe_has_ufo(s->fd);
424     s->has_uso = tap_probe_has_uso(s->fd);
425     s->enabled = true;
426     tap_set_offload(&s->nc, 0, 0, 0, 0, 0, 0, 0);
427     /*
428      * Make sure host header length is set correctly in tap:
429      * it might have been modified by another instance of qemu.
430      */
431     if (vnet_hdr) {
432         tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
433     }
434     tap_read_poll(s, true);
435     s->vhost_net = NULL;
436 
437     s->exit.notify = tap_exit_notify;
438     qemu_add_exit_notifier(&s->exit);
439 
440     return s;
441 }
442 
443 static void launch_script(const char *setup_script, const char *ifname,
444                           int fd, Error **errp)
445 {
446     int pid, status;
447     char *args[3];
448     char **parg;
449 
450     /* try to launch network script */
451     pid = fork();
452     if (pid < 0) {
453         error_setg_errno(errp, errno, "could not launch network script %s",
454                          setup_script);
455         return;
456     }
457     if (pid == 0) {
458         int open_max = sysconf(_SC_OPEN_MAX), i;
459 
460         for (i = 3; i < open_max; i++) {
461             if (i != fd) {
462                 close(i);
463             }
464         }
465         parg = args;
466         *parg++ = (char *)setup_script;
467         *parg++ = (char *)ifname;
468         *parg = NULL;
469         execv(setup_script, args);
470         _exit(1);
471     } else {
472         while (waitpid(pid, &status, 0) != pid) {
473             /* loop */
474         }
475 
476         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
477             return;
478         }
479         error_setg(errp, "network script %s failed with status %d",
480                    setup_script, status);
481     }
482 }
483 
484 static int recv_fd(int c)
485 {
486     int fd;
487     uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
488     struct msghdr msg = {
489         .msg_control = msgbuf,
490         .msg_controllen = sizeof(msgbuf),
491     };
492     struct cmsghdr *cmsg;
493     struct iovec iov;
494     uint8_t req[1];
495     ssize_t len;
496 
497     cmsg = CMSG_FIRSTHDR(&msg);
498     cmsg->cmsg_level = SOL_SOCKET;
499     cmsg->cmsg_type = SCM_RIGHTS;
500     cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
501     msg.msg_controllen = cmsg->cmsg_len;
502 
503     iov.iov_base = req;
504     iov.iov_len = sizeof(req);
505 
506     msg.msg_iov = &iov;
507     msg.msg_iovlen = 1;
508 
509     len = recvmsg(c, &msg, 0);
510     if (len > 0) {
511         memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
512         return fd;
513     }
514 
515     return len;
516 }
517 
518 static int net_bridge_run_helper(const char *helper, const char *bridge,
519                                  Error **errp)
520 {
521     sigset_t oldmask, mask;
522     g_autofree char *default_helper = NULL;
523     int pid, status;
524     char *args[5];
525     char **parg;
526     int sv[2];
527 
528     sigemptyset(&mask);
529     sigaddset(&mask, SIGCHLD);
530     sigprocmask(SIG_BLOCK, &mask, &oldmask);
531 
532     if (!helper) {
533         helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER);
534     }
535 
536     if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
537         error_setg_errno(errp, errno, "socketpair() failed");
538         return -1;
539     }
540 
541     /* try to launch bridge helper */
542     pid = fork();
543     if (pid < 0) {
544         error_setg_errno(errp, errno, "Can't fork bridge helper");
545         return -1;
546     }
547     if (pid == 0) {
548         int open_max = sysconf(_SC_OPEN_MAX), i;
549         char *fd_buf = NULL;
550         char *br_buf = NULL;
551         char *helper_cmd = NULL;
552 
553         for (i = 3; i < open_max; i++) {
554             if (i != sv[1]) {
555                 close(i);
556             }
557         }
558 
559         fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
560 
561         if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
562             /* assume helper is a command */
563 
564             if (strstr(helper, "--br=") == NULL) {
565                 br_buf = g_strdup_printf("%s%s", "--br=", bridge);
566             }
567 
568             helper_cmd = g_strdup_printf("%s %s %s %s", helper,
569                             "--use-vnet", fd_buf, br_buf ? br_buf : "");
570 
571             parg = args;
572             *parg++ = (char *)"sh";
573             *parg++ = (char *)"-c";
574             *parg++ = helper_cmd;
575             *parg++ = NULL;
576 
577             execv("/bin/sh", args);
578             g_free(helper_cmd);
579         } else {
580             /* assume helper is just the executable path name */
581 
582             br_buf = g_strdup_printf("%s%s", "--br=", bridge);
583 
584             parg = args;
585             *parg++ = (char *)helper;
586             *parg++ = (char *)"--use-vnet";
587             *parg++ = fd_buf;
588             *parg++ = br_buf;
589             *parg++ = NULL;
590 
591             execv(helper, args);
592         }
593         g_free(fd_buf);
594         g_free(br_buf);
595         _exit(1);
596 
597     } else {
598         int fd;
599         int saved_errno;
600 
601         close(sv[1]);
602 
603         fd = RETRY_ON_EINTR(recv_fd(sv[0]));
604         saved_errno = errno;
605 
606         close(sv[0]);
607 
608         while (waitpid(pid, &status, 0) != pid) {
609             /* loop */
610         }
611         sigprocmask(SIG_SETMASK, &oldmask, NULL);
612         if (fd < 0) {
613             error_setg_errno(errp, saved_errno,
614                              "failed to recv file descriptor");
615             return -1;
616         }
617         if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
618             error_setg(errp, "bridge helper failed");
619             return -1;
620         }
621         return fd;
622     }
623 }
624 
625 int net_init_bridge(const Netdev *netdev, const char *name,
626                     NetClientState *peer, Error **errp)
627 {
628     const NetdevBridgeOptions *bridge;
629     const char *helper, *br;
630     TAPState *s;
631     int fd, vnet_hdr;
632 
633     assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE);
634     bridge = &netdev->u.bridge;
635     helper = bridge->helper;
636     br     = bridge->br ?: DEFAULT_BRIDGE_INTERFACE;
637 
638     fd = net_bridge_run_helper(helper, br, errp);
639     if (fd == -1) {
640         return -1;
641     }
642 
643     if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
644         error_setg_errno(errp, errno, "Failed to set FD nonblocking");
645         return -1;
646     }
647     vnet_hdr = tap_probe_vnet_hdr(fd, errp);
648     if (vnet_hdr < 0) {
649         close(fd);
650         return -1;
651     }
652     s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
653 
654     qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br);
655 
656     return 0;
657 }
658 
659 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
660                         const char *setup_script, char *ifname,
661                         size_t ifname_sz, int mq_required, Error **errp)
662 {
663     Error *err = NULL;
664     int fd, vnet_hdr_required;
665 
666     if (tap->has_vnet_hdr) {
667         *vnet_hdr = tap->vnet_hdr;
668         vnet_hdr_required = *vnet_hdr;
669     } else {
670         *vnet_hdr = 1;
671         vnet_hdr_required = 0;
672     }
673 
674     fd = RETRY_ON_EINTR(tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
675                       mq_required, errp));
676     if (fd < 0) {
677         return -1;
678     }
679 
680     if (setup_script &&
681         setup_script[0] != '\0' &&
682         strcmp(setup_script, "no") != 0) {
683         launch_script(setup_script, ifname, fd, &err);
684         if (err) {
685             error_propagate(errp, err);
686             close(fd);
687             return -1;
688         }
689     }
690 
691     return fd;
692 }
693 
694 #define MAX_TAP_QUEUES 1024
695 
696 static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
697                              const char *model, const char *name,
698                              const char *ifname, const char *script,
699                              const char *downscript, const char *vhostfdname,
700                              int vnet_hdr, int fd, Error **errp)
701 {
702     Error *err = NULL;
703     TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
704     int vhostfd;
705 
706     tap_set_sndbuf(s->fd, tap, &err);
707     if (err) {
708         error_propagate(errp, err);
709         goto failed;
710     }
711 
712     if (tap->fd || tap->fds) {
713         qemu_set_info_str(&s->nc, "fd=%d", fd);
714     } else if (tap->helper) {
715         qemu_set_info_str(&s->nc, "helper=%s", tap->helper);
716     } else {
717         qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname,
718                           script, downscript);
719 
720         if (strcmp(downscript, "no") != 0) {
721             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
722             snprintf(s->down_script_arg, sizeof(s->down_script_arg),
723                      "%s", ifname);
724         }
725     }
726 
727     if (tap->has_vhost ? tap->vhost :
728         vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
729         VhostNetOptions options;
730 
731         options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
732         options.net_backend = &s->nc;
733         if (tap->has_poll_us) {
734             options.busyloop_timeout = tap->poll_us;
735         } else {
736             options.busyloop_timeout = 0;
737         }
738 
739         if (vhostfdname) {
740             vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
741             if (vhostfd == -1) {
742                 error_propagate(errp, err);
743                 goto failed;
744             }
745             if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
746                 error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
747                                  name, fd);
748                 goto failed;
749             }
750         } else {
751             vhostfd = open("/dev/vhost-net", O_RDWR);
752             if (vhostfd < 0) {
753                 error_setg_errno(errp, errno,
754                                  "tap: open vhost char device failed");
755                 goto failed;
756             }
757             if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
758                 error_setg_errno(errp, errno, "Failed to set FD nonblocking");
759                 goto failed;
760             }
761         }
762         options.opaque = (void *)(uintptr_t)vhostfd;
763         options.nvqs = 2;
764 
765         s->vhost_net = vhost_net_init(&options);
766         if (!s->vhost_net) {
767             error_setg(errp,
768                        "vhost-net requested but could not be initialized");
769             goto failed;
770         }
771     } else if (vhostfdname) {
772         error_setg(errp, "vhostfd(s)= is not valid without vhost");
773         goto failed;
774     }
775 
776     return;
777 
778 failed:
779     qemu_del_net_client(&s->nc);
780 }
781 
782 static int get_fds(char *str, char *fds[], int max)
783 {
784     char *ptr = str, *this;
785     size_t len = strlen(str);
786     int i = 0;
787 
788     while (i < max && ptr < str + len) {
789         this = strchr(ptr, ':');
790 
791         if (this == NULL) {
792             fds[i] = g_strdup(ptr);
793         } else {
794             fds[i] = g_strndup(ptr, this - ptr);
795         }
796 
797         i++;
798         if (this == NULL) {
799             break;
800         } else {
801             ptr = this + 1;
802         }
803     }
804 
805     return i;
806 }
807 
808 int net_init_tap(const Netdev *netdev, const char *name,
809                  NetClientState *peer, Error **errp)
810 {
811     const NetdevTapOptions *tap;
812     int fd, vnet_hdr = 0, i = 0, queues;
813     /* for the no-fd, no-helper case */
814     const char *script;
815     const char *downscript;
816     Error *err = NULL;
817     const char *vhostfdname;
818     char ifname[128];
819     int ret = 0;
820 
821     assert(netdev->type == NET_CLIENT_DRIVER_TAP);
822     tap = &netdev->u.tap;
823     queues = tap->has_queues ? tap->queues : 1;
824     vhostfdname = tap->vhostfd;
825     script = tap->script;
826     downscript = tap->downscript;
827 
828     /* QEMU hubs do not support multiqueue tap, in this case peer is set.
829      * For -netdev, peer is always NULL. */
830     if (peer && (tap->has_queues || tap->fds || tap->vhostfds)) {
831         error_setg(errp, "Multiqueue tap cannot be used with hubs");
832         return -1;
833     }
834 
835     if (tap->fd) {
836         if (tap->ifname || tap->script || tap->downscript ||
837             tap->has_vnet_hdr || tap->helper || tap->has_queues ||
838             tap->fds || tap->vhostfds) {
839             error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
840                        "helper=, queues=, fds=, and vhostfds= "
841                        "are invalid with fd=");
842             return -1;
843         }
844 
845         fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
846         if (fd == -1) {
847             return -1;
848         }
849 
850         if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
851             error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
852                              name, fd);
853             close(fd);
854             return -1;
855         }
856 
857         vnet_hdr = tap_probe_vnet_hdr(fd, errp);
858         if (vnet_hdr < 0) {
859             close(fd);
860             return -1;
861         }
862 
863         net_init_tap_one(tap, peer, "tap", name, NULL,
864                          script, downscript,
865                          vhostfdname, vnet_hdr, fd, &err);
866         if (err) {
867             error_propagate(errp, err);
868             close(fd);
869             return -1;
870         }
871     } else if (tap->fds) {
872         char **fds;
873         char **vhost_fds;
874         int nfds = 0, nvhosts = 0;
875 
876         if (tap->ifname || tap->script || tap->downscript ||
877             tap->has_vnet_hdr || tap->helper || tap->has_queues ||
878             tap->vhostfd) {
879             error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
880                        "helper=, queues=, and vhostfd= "
881                        "are invalid with fds=");
882             return -1;
883         }
884 
885         fds = g_new0(char *, MAX_TAP_QUEUES);
886         vhost_fds = g_new0(char *, MAX_TAP_QUEUES);
887 
888         nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
889         if (tap->vhostfds) {
890             nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
891             if (nfds != nvhosts) {
892                 error_setg(errp, "The number of fds passed does not match "
893                            "the number of vhostfds passed");
894                 ret = -1;
895                 goto free_fail;
896             }
897         }
898 
899         for (i = 0; i < nfds; i++) {
900             fd = monitor_fd_param(monitor_cur(), fds[i], errp);
901             if (fd == -1) {
902                 ret = -1;
903                 goto free_fail;
904             }
905 
906             ret = g_unix_set_fd_nonblocking(fd, true, NULL);
907             if (!ret) {
908                 error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
909                                  name, fd);
910                 goto free_fail;
911             }
912 
913             if (i == 0) {
914                 vnet_hdr = tap_probe_vnet_hdr(fd, errp);
915                 if (vnet_hdr < 0) {
916                     ret = -1;
917                     goto free_fail;
918                 }
919             } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
920                 error_setg(errp,
921                            "vnet_hdr not consistent across given tap fds");
922                 ret = -1;
923                 goto free_fail;
924             }
925 
926             net_init_tap_one(tap, peer, "tap", name, ifname,
927                              script, downscript,
928                              tap->vhostfds ? vhost_fds[i] : NULL,
929                              vnet_hdr, fd, &err);
930             if (err) {
931                 error_propagate(errp, err);
932                 ret = -1;
933                 goto free_fail;
934             }
935         }
936 
937 free_fail:
938         for (i = 0; i < nvhosts; i++) {
939             g_free(vhost_fds[i]);
940         }
941         for (i = 0; i < nfds; i++) {
942             g_free(fds[i]);
943         }
944         g_free(fds);
945         g_free(vhost_fds);
946         return ret;
947     } else if (tap->helper) {
948         if (tap->ifname || tap->script || tap->downscript ||
949             tap->has_vnet_hdr || tap->has_queues || tap->vhostfds) {
950             error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
951                        "queues=, and vhostfds= are invalid with helper=");
952             return -1;
953         }
954 
955         fd = net_bridge_run_helper(tap->helper,
956                                    tap->br ?: DEFAULT_BRIDGE_INTERFACE,
957                                    errp);
958         if (fd == -1) {
959             return -1;
960         }
961 
962         if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
963             error_setg_errno(errp, errno, "Failed to set FD nonblocking");
964             return -1;
965         }
966         vnet_hdr = tap_probe_vnet_hdr(fd, errp);
967         if (vnet_hdr < 0) {
968             close(fd);
969             return -1;
970         }
971 
972         net_init_tap_one(tap, peer, "bridge", name, ifname,
973                          script, downscript, vhostfdname,
974                          vnet_hdr, fd, &err);
975         if (err) {
976             error_propagate(errp, err);
977             close(fd);
978             return -1;
979         }
980     } else {
981         g_autofree char *default_script = NULL;
982         g_autofree char *default_downscript = NULL;
983         if (tap->vhostfds) {
984             error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
985             return -1;
986         }
987 
988         if (!script) {
989             script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
990         }
991         if (!downscript) {
992             downscript = default_downscript =
993                                  get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT);
994         }
995 
996         if (tap->ifname) {
997             pstrcpy(ifname, sizeof ifname, tap->ifname);
998         } else {
999             ifname[0] = '\0';
1000         }
1001 
1002         for (i = 0; i < queues; i++) {
1003             fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
1004                               ifname, sizeof ifname, queues > 1, errp);
1005             if (fd == -1) {
1006                 return -1;
1007             }
1008 
1009             if (queues > 1 && i == 0 && !tap->ifname) {
1010                 if (tap_fd_get_ifname(fd, ifname)) {
1011                     error_setg(errp, "Fail to get ifname");
1012                     close(fd);
1013                     return -1;
1014                 }
1015             }
1016 
1017             net_init_tap_one(tap, peer, "tap", name, ifname,
1018                              i >= 1 ? "no" : script,
1019                              i >= 1 ? "no" : downscript,
1020                              vhostfdname, vnet_hdr, fd, &err);
1021             if (err) {
1022                 error_propagate(errp, err);
1023                 close(fd);
1024                 return -1;
1025             }
1026         }
1027     }
1028 
1029     return 0;
1030 }
1031 
1032 VHostNetState *tap_get_vhost_net(NetClientState *nc)
1033 {
1034     TAPState *s = DO_UPCAST(TAPState, nc, nc);
1035     assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
1036     return s->vhost_net;
1037 }
1038 
1039 int tap_enable(NetClientState *nc)
1040 {
1041     TAPState *s = DO_UPCAST(TAPState, nc, nc);
1042     int ret;
1043 
1044     if (s->enabled) {
1045         return 0;
1046     } else {
1047         ret = tap_fd_enable(s->fd);
1048         if (ret == 0) {
1049             s->enabled = true;
1050             tap_update_fd_handler(s);
1051         }
1052         return ret;
1053     }
1054 }
1055 
1056 int tap_disable(NetClientState *nc)
1057 {
1058     TAPState *s = DO_UPCAST(TAPState, nc, nc);
1059     int ret;
1060 
1061     if (s->enabled == 0) {
1062         return 0;
1063     } else {
1064         ret = tap_fd_disable(s->fd);
1065         if (ret == 0) {
1066             qemu_purge_queued_packets(nc);
1067             s->enabled = false;
1068             tap_update_fd_handler(s);
1069         }
1070         return ret;
1071     }
1072 }
1073