xref: /openbmc/qemu/net/tap.c (revision 51b24e34)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009 Red Hat, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "net/tap.h"
27 
28 #include "config-host.h"
29 
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/wait.h>
33 #include <sys/socket.h>
34 #include <net/if.h>
35 
36 #include "net.h"
37 #include "sysemu.h"
38 #include "qemu-char.h"
39 #include "qemu-common.h"
40 #include "qemu-error.h"
41 
42 #include "net/tap-linux.h"
43 
44 #include "hw/vhost_net.h"
45 
46 /* Maximum GSO packet size (64k) plus plenty of room for
47  * the ethernet and virtio_net headers
48  */
49 #define TAP_BUFSIZE (4096 + 65536)
50 
51 typedef struct TAPState {
52     VLANClientState nc;
53     int fd;
54     char down_script[1024];
55     char down_script_arg[128];
56     uint8_t buf[TAP_BUFSIZE];
57     unsigned int read_poll : 1;
58     unsigned int write_poll : 1;
59     unsigned int using_vnet_hdr : 1;
60     unsigned int has_ufo: 1;
61     VHostNetState *vhost_net;
62     unsigned host_vnet_hdr_len;
63 } TAPState;
64 
65 static int launch_script(const char *setup_script, const char *ifname, int fd);
66 
67 static int tap_can_send(void *opaque);
68 static void tap_send(void *opaque);
69 static void tap_writable(void *opaque);
70 
71 static void tap_update_fd_handler(TAPState *s)
72 {
73     qemu_set_fd_handler2(s->fd,
74                          s->read_poll  ? tap_can_send : NULL,
75                          s->read_poll  ? tap_send     : NULL,
76                          s->write_poll ? tap_writable : NULL,
77                          s);
78 }
79 
80 static void tap_read_poll(TAPState *s, int enable)
81 {
82     s->read_poll = !!enable;
83     tap_update_fd_handler(s);
84 }
85 
86 static void tap_write_poll(TAPState *s, int enable)
87 {
88     s->write_poll = !!enable;
89     tap_update_fd_handler(s);
90 }
91 
92 static void tap_writable(void *opaque)
93 {
94     TAPState *s = opaque;
95 
96     tap_write_poll(s, 0);
97 
98     qemu_flush_queued_packets(&s->nc);
99 }
100 
101 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
102 {
103     ssize_t len;
104 
105     do {
106         len = writev(s->fd, iov, iovcnt);
107     } while (len == -1 && errno == EINTR);
108 
109     if (len == -1 && errno == EAGAIN) {
110         tap_write_poll(s, 1);
111         return 0;
112     }
113 
114     return len;
115 }
116 
117 static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
118                                int iovcnt)
119 {
120     TAPState *s = DO_UPCAST(TAPState, nc, nc);
121     const struct iovec *iovp = iov;
122     struct iovec iov_copy[iovcnt + 1];
123     struct virtio_net_hdr_mrg_rxbuf hdr = { };
124 
125     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
126         iov_copy[0].iov_base = &hdr;
127         iov_copy[0].iov_len =  s->host_vnet_hdr_len;
128         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
129         iovp = iov_copy;
130         iovcnt++;
131     }
132 
133     return tap_write_packet(s, iovp, iovcnt);
134 }
135 
136 static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
137 {
138     TAPState *s = DO_UPCAST(TAPState, nc, nc);
139     struct iovec iov[2];
140     int iovcnt = 0;
141     struct virtio_net_hdr_mrg_rxbuf hdr = { };
142 
143     if (s->host_vnet_hdr_len) {
144         iov[iovcnt].iov_base = &hdr;
145         iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
146         iovcnt++;
147     }
148 
149     iov[iovcnt].iov_base = (char *)buf;
150     iov[iovcnt].iov_len  = size;
151     iovcnt++;
152 
153     return tap_write_packet(s, iov, iovcnt);
154 }
155 
156 static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
157 {
158     TAPState *s = DO_UPCAST(TAPState, nc, nc);
159     struct iovec iov[1];
160 
161     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
162         return tap_receive_raw(nc, buf, size);
163     }
164 
165     iov[0].iov_base = (char *)buf;
166     iov[0].iov_len  = size;
167 
168     return tap_write_packet(s, iov, 1);
169 }
170 
171 static int tap_can_send(void *opaque)
172 {
173     TAPState *s = opaque;
174 
175     return qemu_can_send_packet(&s->nc);
176 }
177 
178 #ifndef __sun__
179 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
180 {
181     return read(tapfd, buf, maxlen);
182 }
183 #endif
184 
185 static void tap_send_completed(VLANClientState *nc, ssize_t len)
186 {
187     TAPState *s = DO_UPCAST(TAPState, nc, nc);
188     tap_read_poll(s, 1);
189 }
190 
191 static void tap_send(void *opaque)
192 {
193     TAPState *s = opaque;
194     int size;
195 
196     do {
197         uint8_t *buf = s->buf;
198 
199         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
200         if (size <= 0) {
201             break;
202         }
203 
204         if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
205             buf  += s->host_vnet_hdr_len;
206             size -= s->host_vnet_hdr_len;
207         }
208 
209         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
210         if (size == 0) {
211             tap_read_poll(s, 0);
212         }
213     } while (size > 0 && qemu_can_send_packet(&s->nc));
214 }
215 
216 int tap_has_ufo(VLANClientState *nc)
217 {
218     TAPState *s = DO_UPCAST(TAPState, nc, nc);
219 
220     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
221 
222     return s->has_ufo;
223 }
224 
225 int tap_has_vnet_hdr(VLANClientState *nc)
226 {
227     TAPState *s = DO_UPCAST(TAPState, nc, nc);
228 
229     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
230 
231     return !!s->host_vnet_hdr_len;
232 }
233 
234 int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
235 {
236     TAPState *s = DO_UPCAST(TAPState, nc, nc);
237 
238     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
239 
240     return tap_probe_vnet_hdr_len(s->fd, len);
241 }
242 
243 void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
244 {
245     TAPState *s = DO_UPCAST(TAPState, nc, nc);
246 
247     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
248     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
249            len == sizeof(struct virtio_net_hdr));
250 
251     tap_fd_set_vnet_hdr_len(s->fd, len);
252     s->host_vnet_hdr_len = len;
253 }
254 
255 void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
256 {
257     TAPState *s = DO_UPCAST(TAPState, nc, nc);
258 
259     using_vnet_hdr = using_vnet_hdr != 0;
260 
261     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
262     assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
263 
264     s->using_vnet_hdr = using_vnet_hdr;
265 }
266 
267 void tap_set_offload(VLANClientState *nc, int csum, int tso4,
268                      int tso6, int ecn, int ufo)
269 {
270     TAPState *s = DO_UPCAST(TAPState, nc, nc);
271     if (s->fd < 0) {
272         return;
273     }
274 
275     tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
276 }
277 
278 static void tap_cleanup(VLANClientState *nc)
279 {
280     TAPState *s = DO_UPCAST(TAPState, nc, nc);
281 
282     if (s->vhost_net) {
283         vhost_net_cleanup(s->vhost_net);
284         s->vhost_net = NULL;
285     }
286 
287     qemu_purge_queued_packets(nc);
288 
289     if (s->down_script[0])
290         launch_script(s->down_script, s->down_script_arg, s->fd);
291 
292     tap_read_poll(s, 0);
293     tap_write_poll(s, 0);
294     close(s->fd);
295     s->fd = -1;
296 }
297 
298 static void tap_poll(VLANClientState *nc, bool enable)
299 {
300     TAPState *s = DO_UPCAST(TAPState, nc, nc);
301     tap_read_poll(s, enable);
302     tap_write_poll(s, enable);
303 }
304 
305 int tap_get_fd(VLANClientState *nc)
306 {
307     TAPState *s = DO_UPCAST(TAPState, nc, nc);
308     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
309     return s->fd;
310 }
311 
312 /* fd support */
313 
314 static NetClientInfo net_tap_info = {
315     .type = NET_CLIENT_TYPE_TAP,
316     .size = sizeof(TAPState),
317     .receive = tap_receive,
318     .receive_raw = tap_receive_raw,
319     .receive_iov = tap_receive_iov,
320     .poll = tap_poll,
321     .cleanup = tap_cleanup,
322 };
323 
324 static TAPState *net_tap_fd_init(VLANState *vlan,
325                                  const char *model,
326                                  const char *name,
327                                  int fd,
328                                  int vnet_hdr)
329 {
330     VLANClientState *nc;
331     TAPState *s;
332 
333     nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
334 
335     s = DO_UPCAST(TAPState, nc, nc);
336 
337     s->fd = fd;
338     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
339     s->using_vnet_hdr = 0;
340     s->has_ufo = tap_probe_has_ufo(s->fd);
341     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
342     tap_read_poll(s, 1);
343     s->vhost_net = NULL;
344     return s;
345 }
346 
347 static int launch_script(const char *setup_script, const char *ifname, int fd)
348 {
349     sigset_t oldmask, mask;
350     int pid, status;
351     char *args[3];
352     char **parg;
353 
354     sigemptyset(&mask);
355     sigaddset(&mask, SIGCHLD);
356     sigprocmask(SIG_BLOCK, &mask, &oldmask);
357 
358     /* try to launch network script */
359     pid = fork();
360     if (pid == 0) {
361         int open_max = sysconf(_SC_OPEN_MAX), i;
362 
363         for (i = 0; i < open_max; i++) {
364             if (i != STDIN_FILENO &&
365                 i != STDOUT_FILENO &&
366                 i != STDERR_FILENO &&
367                 i != fd) {
368                 close(i);
369             }
370         }
371         parg = args;
372         *parg++ = (char *)setup_script;
373         *parg++ = (char *)ifname;
374         *parg = NULL;
375         execv(setup_script, args);
376         _exit(1);
377     } else if (pid > 0) {
378         while (waitpid(pid, &status, 0) != pid) {
379             /* loop */
380         }
381         sigprocmask(SIG_SETMASK, &oldmask, NULL);
382 
383         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
384             return 0;
385         }
386     }
387     fprintf(stderr, "%s: could not launch network script\n", setup_script);
388     return -1;
389 }
390 
391 static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
392 {
393     int fd, vnet_hdr_required;
394     char ifname[128] = {0,};
395     const char *setup_script;
396 
397     if (qemu_opt_get(opts, "ifname")) {
398         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
399     }
400 
401     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
402     if (qemu_opt_get(opts, "vnet_hdr")) {
403         vnet_hdr_required = *vnet_hdr;
404     } else {
405         vnet_hdr_required = 0;
406     }
407 
408     TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
409     if (fd < 0) {
410         return -1;
411     }
412 
413     setup_script = qemu_opt_get(opts, "script");
414     if (setup_script &&
415         setup_script[0] != '\0' &&
416         strcmp(setup_script, "no") != 0 &&
417         launch_script(setup_script, ifname, fd)) {
418         close(fd);
419         return -1;
420     }
421 
422     qemu_opt_set(opts, "ifname", ifname);
423 
424     return fd;
425 }
426 
427 int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
428 {
429     TAPState *s;
430     int fd, vnet_hdr = 0;
431 
432     if (qemu_opt_get(opts, "fd")) {
433         if (qemu_opt_get(opts, "ifname") ||
434             qemu_opt_get(opts, "script") ||
435             qemu_opt_get(opts, "downscript") ||
436             qemu_opt_get(opts, "vnet_hdr")) {
437             error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
438             return -1;
439         }
440 
441         fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
442         if (fd == -1) {
443             return -1;
444         }
445 
446         fcntl(fd, F_SETFL, O_NONBLOCK);
447 
448         vnet_hdr = tap_probe_vnet_hdr(fd);
449     } else {
450         if (!qemu_opt_get(opts, "script")) {
451             qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
452         }
453 
454         if (!qemu_opt_get(opts, "downscript")) {
455             qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
456         }
457 
458         fd = net_tap_init(opts, &vnet_hdr);
459         if (fd == -1) {
460             return -1;
461         }
462     }
463 
464     s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
465     if (!s) {
466         close(fd);
467         return -1;
468     }
469 
470     if (tap_set_sndbuf(s->fd, opts) < 0) {
471         return -1;
472     }
473 
474     if (qemu_opt_get(opts, "fd")) {
475         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
476     } else {
477         const char *ifname, *script, *downscript;
478 
479         ifname     = qemu_opt_get(opts, "ifname");
480         script     = qemu_opt_get(opts, "script");
481         downscript = qemu_opt_get(opts, "downscript");
482 
483         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
484                  "ifname=%s,script=%s,downscript=%s",
485                  ifname, script, downscript);
486 
487         if (strcmp(downscript, "no") != 0) {
488             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
489             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
490         }
491     }
492 
493     if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") ||
494                           qemu_opt_get_bool(opts, "vhostforce", false))) {
495         int vhostfd, r;
496         bool force = qemu_opt_get_bool(opts, "vhostforce", false);
497         if (qemu_opt_get(opts, "vhostfd")) {
498             r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
499             if (r == -1) {
500                 return -1;
501             }
502             vhostfd = r;
503         } else {
504             vhostfd = -1;
505         }
506         s->vhost_net = vhost_net_init(&s->nc, vhostfd, force);
507         if (!s->vhost_net) {
508             error_report("vhost-net requested but could not be initialized");
509             return -1;
510         }
511     } else if (qemu_opt_get(opts, "vhostfd")) {
512         error_report("vhostfd= is not valid without vhost");
513         return -1;
514     }
515 
516     return 0;
517 }
518 
519 VHostNetState *tap_get_vhost_net(VLANClientState *nc)
520 {
521     TAPState *s = DO_UPCAST(TAPState, nc, nc);
522     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
523     return s->vhost_net;
524 }
525