xref: /openbmc/qemu/net/tap.c (revision b6828931ebac027b869e40ec9518a291078dafe5)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009 Red Hat, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "net/tap.h"
27 
28 #include "config-host.h"
29 
30 #include <signal.h>
31 #include <sys/ioctl.h>
32 #include <sys/stat.h>
33 #include <sys/wait.h>
34 #include <sys/socket.h>
35 #include <net/if.h>
36 
37 #include "net.h"
38 #include "sysemu.h"
39 #include "qemu-char.h"
40 #include "qemu-common.h"
41 #include "qemu-error.h"
42 
43 #include "net/tap-linux.h"
44 
45 #include "hw/vhost_net.h"
46 
47 /* Maximum GSO packet size (64k) plus plenty of room for
48  * the ethernet and virtio_net headers
49  */
50 #define TAP_BUFSIZE (4096 + 65536)
51 
52 typedef struct TAPState {
53     VLANClientState nc;
54     int fd;
55     char down_script[1024];
56     char down_script_arg[128];
57     uint8_t buf[TAP_BUFSIZE];
58     unsigned int read_poll : 1;
59     unsigned int write_poll : 1;
60     unsigned int has_vnet_hdr : 1;
61     unsigned int using_vnet_hdr : 1;
62     unsigned int has_ufo: 1;
63     VHostNetState *vhost_net;
64 } TAPState;
65 
66 static int launch_script(const char *setup_script, const char *ifname, int fd);
67 
68 static int tap_can_send(void *opaque);
69 static void tap_send(void *opaque);
70 static void tap_writable(void *opaque);
71 
72 static void tap_update_fd_handler(TAPState *s)
73 {
74     qemu_set_fd_handler2(s->fd,
75                          s->read_poll  ? tap_can_send : NULL,
76                          s->read_poll  ? tap_send     : NULL,
77                          s->write_poll ? tap_writable : NULL,
78                          s);
79 }
80 
81 static void tap_read_poll(TAPState *s, int enable)
82 {
83     s->read_poll = !!enable;
84     tap_update_fd_handler(s);
85 }
86 
87 static void tap_write_poll(TAPState *s, int enable)
88 {
89     s->write_poll = !!enable;
90     tap_update_fd_handler(s);
91 }
92 
93 static void tap_writable(void *opaque)
94 {
95     TAPState *s = opaque;
96 
97     tap_write_poll(s, 0);
98 
99     qemu_flush_queued_packets(&s->nc);
100 }
101 
102 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
103 {
104     ssize_t len;
105 
106     do {
107         len = writev(s->fd, iov, iovcnt);
108     } while (len == -1 && errno == EINTR);
109 
110     if (len == -1 && errno == EAGAIN) {
111         tap_write_poll(s, 1);
112         return 0;
113     }
114 
115     return len;
116 }
117 
118 static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
119                                int iovcnt)
120 {
121     TAPState *s = DO_UPCAST(TAPState, nc, nc);
122     const struct iovec *iovp = iov;
123     struct iovec iov_copy[iovcnt + 1];
124     struct virtio_net_hdr hdr = { 0, };
125 
126     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
127         iov_copy[0].iov_base = &hdr;
128         iov_copy[0].iov_len =  sizeof(hdr);
129         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
130         iovp = iov_copy;
131         iovcnt++;
132     }
133 
134     return tap_write_packet(s, iovp, iovcnt);
135 }
136 
137 static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
138 {
139     TAPState *s = DO_UPCAST(TAPState, nc, nc);
140     struct iovec iov[2];
141     int iovcnt = 0;
142     struct virtio_net_hdr hdr = { 0, };
143 
144     if (s->has_vnet_hdr) {
145         iov[iovcnt].iov_base = &hdr;
146         iov[iovcnt].iov_len  = sizeof(hdr);
147         iovcnt++;
148     }
149 
150     iov[iovcnt].iov_base = (char *)buf;
151     iov[iovcnt].iov_len  = size;
152     iovcnt++;
153 
154     return tap_write_packet(s, iov, iovcnt);
155 }
156 
157 static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
158 {
159     TAPState *s = DO_UPCAST(TAPState, nc, nc);
160     struct iovec iov[1];
161 
162     if (s->has_vnet_hdr && !s->using_vnet_hdr) {
163         return tap_receive_raw(nc, buf, size);
164     }
165 
166     iov[0].iov_base = (char *)buf;
167     iov[0].iov_len  = size;
168 
169     return tap_write_packet(s, iov, 1);
170 }
171 
172 static int tap_can_send(void *opaque)
173 {
174     TAPState *s = opaque;
175 
176     return qemu_can_send_packet(&s->nc);
177 }
178 
179 #ifndef __sun__
180 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
181 {
182     return read(tapfd, buf, maxlen);
183 }
184 #endif
185 
186 static void tap_send_completed(VLANClientState *nc, ssize_t len)
187 {
188     TAPState *s = DO_UPCAST(TAPState, nc, nc);
189     tap_read_poll(s, 1);
190 }
191 
192 static void tap_send(void *opaque)
193 {
194     TAPState *s = opaque;
195     int size;
196 
197     do {
198         uint8_t *buf = s->buf;
199 
200         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
201         if (size <= 0) {
202             break;
203         }
204 
205         if (s->has_vnet_hdr && !s->using_vnet_hdr) {
206             buf  += sizeof(struct virtio_net_hdr);
207             size -= sizeof(struct virtio_net_hdr);
208         }
209 
210         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
211         if (size == 0) {
212             tap_read_poll(s, 0);
213         }
214     } while (size > 0 && qemu_can_send_packet(&s->nc));
215 }
216 
217 int tap_has_ufo(VLANClientState *nc)
218 {
219     TAPState *s = DO_UPCAST(TAPState, nc, nc);
220 
221     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
222 
223     return s->has_ufo;
224 }
225 
226 int tap_has_vnet_hdr(VLANClientState *nc)
227 {
228     TAPState *s = DO_UPCAST(TAPState, nc, nc);
229 
230     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
231 
232     return s->has_vnet_hdr;
233 }
234 
235 void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
236 {
237     TAPState *s = DO_UPCAST(TAPState, nc, nc);
238 
239     using_vnet_hdr = using_vnet_hdr != 0;
240 
241     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
242     assert(s->has_vnet_hdr == using_vnet_hdr);
243 
244     s->using_vnet_hdr = using_vnet_hdr;
245 }
246 
247 void tap_set_offload(VLANClientState *nc, int csum, int tso4,
248                      int tso6, int ecn, int ufo)
249 {
250     TAPState *s = DO_UPCAST(TAPState, nc, nc);
251 
252     return tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
253 }
254 
255 static void tap_cleanup(VLANClientState *nc)
256 {
257     TAPState *s = DO_UPCAST(TAPState, nc, nc);
258 
259     if (s->vhost_net) {
260         vhost_net_cleanup(s->vhost_net);
261     }
262 
263     qemu_purge_queued_packets(nc);
264 
265     if (s->down_script[0])
266         launch_script(s->down_script, s->down_script_arg, s->fd);
267 
268     tap_read_poll(s, 0);
269     tap_write_poll(s, 0);
270     close(s->fd);
271 }
272 
273 static void tap_poll(VLANClientState *nc, bool enable)
274 {
275     TAPState *s = DO_UPCAST(TAPState, nc, nc);
276     tap_read_poll(s, enable);
277     tap_write_poll(s, enable);
278 }
279 
280 int tap_get_fd(VLANClientState *nc)
281 {
282     TAPState *s = DO_UPCAST(TAPState, nc, nc);
283     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
284     return s->fd;
285 }
286 
287 /* fd support */
288 
289 static NetClientInfo net_tap_info = {
290     .type = NET_CLIENT_TYPE_TAP,
291     .size = sizeof(TAPState),
292     .receive = tap_receive,
293     .receive_raw = tap_receive_raw,
294     .receive_iov = tap_receive_iov,
295     .poll = tap_poll,
296     .cleanup = tap_cleanup,
297 };
298 
299 static TAPState *net_tap_fd_init(VLANState *vlan,
300                                  const char *model,
301                                  const char *name,
302                                  int fd,
303                                  int vnet_hdr)
304 {
305     VLANClientState *nc;
306     TAPState *s;
307 
308     nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
309 
310     s = DO_UPCAST(TAPState, nc, nc);
311 
312     s->fd = fd;
313     s->has_vnet_hdr = vnet_hdr != 0;
314     s->using_vnet_hdr = 0;
315     s->has_ufo = tap_probe_has_ufo(s->fd);
316     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
317     tap_read_poll(s, 1);
318     s->vhost_net = NULL;
319     return s;
320 }
321 
322 static int launch_script(const char *setup_script, const char *ifname, int fd)
323 {
324     sigset_t oldmask, mask;
325     int pid, status;
326     char *args[3];
327     char **parg;
328 
329     sigemptyset(&mask);
330     sigaddset(&mask, SIGCHLD);
331     sigprocmask(SIG_BLOCK, &mask, &oldmask);
332 
333     /* try to launch network script */
334     pid = fork();
335     if (pid == 0) {
336         int open_max = sysconf(_SC_OPEN_MAX), i;
337 
338         for (i = 0; i < open_max; i++) {
339             if (i != STDIN_FILENO &&
340                 i != STDOUT_FILENO &&
341                 i != STDERR_FILENO &&
342                 i != fd) {
343                 close(i);
344             }
345         }
346         parg = args;
347         *parg++ = (char *)setup_script;
348         *parg++ = (char *)ifname;
349         *parg = NULL;
350         execv(setup_script, args);
351         _exit(1);
352     } else if (pid > 0) {
353         while (waitpid(pid, &status, 0) != pid) {
354             /* loop */
355         }
356         sigprocmask(SIG_SETMASK, &oldmask, NULL);
357 
358         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
359             return 0;
360         }
361     }
362     fprintf(stderr, "%s: could not launch network script\n", setup_script);
363     return -1;
364 }
365 
366 static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
367 {
368     int fd, vnet_hdr_required;
369     char ifname[128] = {0,};
370     const char *setup_script;
371 
372     if (qemu_opt_get(opts, "ifname")) {
373         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
374     }
375 
376     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
377     if (qemu_opt_get(opts, "vnet_hdr")) {
378         vnet_hdr_required = *vnet_hdr;
379     } else {
380         vnet_hdr_required = 0;
381     }
382 
383     TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
384     if (fd < 0) {
385         return -1;
386     }
387 
388     setup_script = qemu_opt_get(opts, "script");
389     if (setup_script &&
390         setup_script[0] != '\0' &&
391         strcmp(setup_script, "no") != 0 &&
392         launch_script(setup_script, ifname, fd)) {
393         close(fd);
394         return -1;
395     }
396 
397     qemu_opt_set(opts, "ifname", ifname);
398 
399     return fd;
400 }
401 
402 int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
403 {
404     TAPState *s;
405     int fd, vnet_hdr = 0;
406 
407     if (qemu_opt_get(opts, "fd")) {
408         if (qemu_opt_get(opts, "ifname") ||
409             qemu_opt_get(opts, "script") ||
410             qemu_opt_get(opts, "downscript") ||
411             qemu_opt_get(opts, "vnet_hdr")) {
412             error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
413             return -1;
414         }
415 
416         fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
417         if (fd == -1) {
418             return -1;
419         }
420 
421         fcntl(fd, F_SETFL, O_NONBLOCK);
422 
423         vnet_hdr = tap_probe_vnet_hdr(fd);
424     } else {
425         if (!qemu_opt_get(opts, "script")) {
426             qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
427         }
428 
429         if (!qemu_opt_get(opts, "downscript")) {
430             qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
431         }
432 
433         fd = net_tap_init(opts, &vnet_hdr);
434         if (fd == -1) {
435             return -1;
436         }
437     }
438 
439     s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
440     if (!s) {
441         close(fd);
442         return -1;
443     }
444 
445     if (tap_set_sndbuf(s->fd, opts) < 0) {
446         return -1;
447     }
448 
449     if (qemu_opt_get(opts, "fd")) {
450         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
451     } else {
452         const char *ifname, *script, *downscript;
453 
454         ifname     = qemu_opt_get(opts, "ifname");
455         script     = qemu_opt_get(opts, "script");
456         downscript = qemu_opt_get(opts, "downscript");
457 
458         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
459                  "ifname=%s,script=%s,downscript=%s",
460                  ifname, script, downscript);
461 
462         if (strcmp(downscript, "no") != 0) {
463             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
464             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
465         }
466     }
467 
468     if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd"))) {
469         int vhostfd, r;
470         if (qemu_opt_get(opts, "vhostfd")) {
471             r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
472             if (r == -1) {
473                 return -1;
474             }
475             vhostfd = r;
476         } else {
477             vhostfd = -1;
478         }
479         s->vhost_net = vhost_net_init(&s->nc, vhostfd);
480         if (!s->vhost_net) {
481             error_report("vhost-net requested but could not be initialized");
482             return -1;
483         }
484     } else if (qemu_opt_get(opts, "vhostfd")) {
485         error_report("vhostfd= is not valid without vhost");
486         return -1;
487     }
488 
489     return 0;
490 }
491 
492 VHostNetState *tap_get_vhost_net(VLANClientState *nc)
493 {
494     TAPState *s = DO_UPCAST(TAPState, nc, nc);
495     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
496     return s->vhost_net;
497 }
498