xref: /openbmc/qemu/net/tap.c (revision 1529ae1b)
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  * Copyright (c) 2009 Red Hat, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "net/tap.h"
27 
28 #include "config-host.h"
29 
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/wait.h>
33 #include <sys/socket.h>
34 #include <net/if.h>
35 
36 #include "net.h"
37 #include "sysemu.h"
38 #include "qemu-char.h"
39 #include "qemu-common.h"
40 #include "qemu-error.h"
41 
42 #include "net/tap-linux.h"
43 
44 #include "hw/vhost_net.h"
45 
46 /* Maximum GSO packet size (64k) plus plenty of room for
47  * the ethernet and virtio_net headers
48  */
49 #define TAP_BUFSIZE (4096 + 65536)
50 
51 typedef struct TAPState {
52     VLANClientState nc;
53     int fd;
54     char down_script[1024];
55     char down_script_arg[128];
56     uint8_t buf[TAP_BUFSIZE];
57     unsigned int read_poll : 1;
58     unsigned int write_poll : 1;
59     unsigned int using_vnet_hdr : 1;
60     unsigned int has_ufo: 1;
61     VHostNetState *vhost_net;
62     unsigned host_vnet_hdr_len;
63 } TAPState;
64 
65 static int launch_script(const char *setup_script, const char *ifname, int fd);
66 
67 static int tap_can_send(void *opaque);
68 static void tap_send(void *opaque);
69 static void tap_writable(void *opaque);
70 
71 static void tap_update_fd_handler(TAPState *s)
72 {
73     qemu_set_fd_handler2(s->fd,
74                          s->read_poll  ? tap_can_send : NULL,
75                          s->read_poll  ? tap_send     : NULL,
76                          s->write_poll ? tap_writable : NULL,
77                          s);
78 }
79 
80 static void tap_read_poll(TAPState *s, int enable)
81 {
82     s->read_poll = !!enable;
83     tap_update_fd_handler(s);
84 }
85 
86 static void tap_write_poll(TAPState *s, int enable)
87 {
88     s->write_poll = !!enable;
89     tap_update_fd_handler(s);
90 }
91 
92 static void tap_writable(void *opaque)
93 {
94     TAPState *s = opaque;
95 
96     tap_write_poll(s, 0);
97 
98     qemu_flush_queued_packets(&s->nc);
99 }
100 
101 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
102 {
103     ssize_t len;
104 
105     do {
106         len = writev(s->fd, iov, iovcnt);
107     } while (len == -1 && errno == EINTR);
108 
109     if (len == -1 && errno == EAGAIN) {
110         tap_write_poll(s, 1);
111         return 0;
112     }
113 
114     return len;
115 }
116 
117 static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
118                                int iovcnt)
119 {
120     TAPState *s = DO_UPCAST(TAPState, nc, nc);
121     const struct iovec *iovp = iov;
122     struct iovec iov_copy[iovcnt + 1];
123     struct virtio_net_hdr_mrg_rxbuf hdr = { };
124 
125     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
126         iov_copy[0].iov_base = &hdr;
127         iov_copy[0].iov_len =  s->host_vnet_hdr_len;
128         memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
129         iovp = iov_copy;
130         iovcnt++;
131     }
132 
133     return tap_write_packet(s, iovp, iovcnt);
134 }
135 
136 static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
137 {
138     TAPState *s = DO_UPCAST(TAPState, nc, nc);
139     struct iovec iov[2];
140     int iovcnt = 0;
141     struct virtio_net_hdr_mrg_rxbuf hdr = { };
142 
143     if (s->host_vnet_hdr_len) {
144         iov[iovcnt].iov_base = &hdr;
145         iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
146         iovcnt++;
147     }
148 
149     iov[iovcnt].iov_base = (char *)buf;
150     iov[iovcnt].iov_len  = size;
151     iovcnt++;
152 
153     return tap_write_packet(s, iov, iovcnt);
154 }
155 
156 static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
157 {
158     TAPState *s = DO_UPCAST(TAPState, nc, nc);
159     struct iovec iov[1];
160 
161     if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
162         return tap_receive_raw(nc, buf, size);
163     }
164 
165     iov[0].iov_base = (char *)buf;
166     iov[0].iov_len  = size;
167 
168     return tap_write_packet(s, iov, 1);
169 }
170 
171 static int tap_can_send(void *opaque)
172 {
173     TAPState *s = opaque;
174 
175     return qemu_can_send_packet(&s->nc);
176 }
177 
178 #ifndef __sun__
179 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
180 {
181     return read(tapfd, buf, maxlen);
182 }
183 #endif
184 
185 static void tap_send_completed(VLANClientState *nc, ssize_t len)
186 {
187     TAPState *s = DO_UPCAST(TAPState, nc, nc);
188     tap_read_poll(s, 1);
189 }
190 
191 static void tap_send(void *opaque)
192 {
193     TAPState *s = opaque;
194     int size;
195 
196     do {
197         uint8_t *buf = s->buf;
198 
199         size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
200         if (size <= 0) {
201             break;
202         }
203 
204         if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
205             buf  += s->host_vnet_hdr_len;
206             size -= s->host_vnet_hdr_len;
207         }
208 
209         size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
210         if (size == 0) {
211             tap_read_poll(s, 0);
212         }
213     } while (size > 0 && qemu_can_send_packet(&s->nc));
214 }
215 
216 int tap_has_ufo(VLANClientState *nc)
217 {
218     TAPState *s = DO_UPCAST(TAPState, nc, nc);
219 
220     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
221 
222     return s->has_ufo;
223 }
224 
225 int tap_has_vnet_hdr(VLANClientState *nc)
226 {
227     TAPState *s = DO_UPCAST(TAPState, nc, nc);
228 
229     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
230 
231     return !!s->host_vnet_hdr_len;
232 }
233 
234 int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
235 {
236     TAPState *s = DO_UPCAST(TAPState, nc, nc);
237 
238     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
239 
240     return tap_probe_vnet_hdr_len(s->fd, len);
241 }
242 
243 void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
244 {
245     TAPState *s = DO_UPCAST(TAPState, nc, nc);
246 
247     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
248     assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
249            len == sizeof(struct virtio_net_hdr));
250 
251     tap_fd_set_vnet_hdr_len(s->fd, len);
252     s->host_vnet_hdr_len = len;
253 }
254 
255 void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
256 {
257     TAPState *s = DO_UPCAST(TAPState, nc, nc);
258 
259     using_vnet_hdr = using_vnet_hdr != 0;
260 
261     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
262     assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
263 
264     s->using_vnet_hdr = using_vnet_hdr;
265 }
266 
267 void tap_set_offload(VLANClientState *nc, int csum, int tso4,
268                      int tso6, int ecn, int ufo)
269 {
270     TAPState *s = DO_UPCAST(TAPState, nc, nc);
271     if (s->fd < 0) {
272         return;
273     }
274 
275     tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
276 }
277 
278 static void tap_cleanup(VLANClientState *nc)
279 {
280     TAPState *s = DO_UPCAST(TAPState, nc, nc);
281 
282     if (s->vhost_net) {
283         vhost_net_cleanup(s->vhost_net);
284         s->vhost_net = NULL;
285     }
286 
287     qemu_purge_queued_packets(nc);
288 
289     if (s->down_script[0])
290         launch_script(s->down_script, s->down_script_arg, s->fd);
291 
292     tap_read_poll(s, 0);
293     tap_write_poll(s, 0);
294     close(s->fd);
295     s->fd = -1;
296 }
297 
298 static void tap_poll(VLANClientState *nc, bool enable)
299 {
300     TAPState *s = DO_UPCAST(TAPState, nc, nc);
301     tap_read_poll(s, enable);
302     tap_write_poll(s, enable);
303 }
304 
305 int tap_get_fd(VLANClientState *nc)
306 {
307     TAPState *s = DO_UPCAST(TAPState, nc, nc);
308     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
309     return s->fd;
310 }
311 
312 /* fd support */
313 
314 static NetClientInfo net_tap_info = {
315     .type = NET_CLIENT_TYPE_TAP,
316     .size = sizeof(TAPState),
317     .receive = tap_receive,
318     .receive_raw = tap_receive_raw,
319     .receive_iov = tap_receive_iov,
320     .poll = tap_poll,
321     .cleanup = tap_cleanup,
322 };
323 
324 static TAPState *net_tap_fd_init(VLANState *vlan,
325                                  const char *model,
326                                  const char *name,
327                                  int fd,
328                                  int vnet_hdr)
329 {
330     VLANClientState *nc;
331     TAPState *s;
332 
333     nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
334 
335     s = DO_UPCAST(TAPState, nc, nc);
336 
337     s->fd = fd;
338     s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
339     s->using_vnet_hdr = 0;
340     s->has_ufo = tap_probe_has_ufo(s->fd);
341     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
342     tap_read_poll(s, 1);
343     s->vhost_net = NULL;
344     return s;
345 }
346 
347 static int launch_script(const char *setup_script, const char *ifname, int fd)
348 {
349     int pid, status;
350     char *args[3];
351     char **parg;
352 
353     /* try to launch network script */
354     pid = fork();
355     if (pid == 0) {
356         int open_max = sysconf(_SC_OPEN_MAX), i;
357 
358         for (i = 0; i < open_max; i++) {
359             if (i != STDIN_FILENO &&
360                 i != STDOUT_FILENO &&
361                 i != STDERR_FILENO &&
362                 i != fd) {
363                 close(i);
364             }
365         }
366         parg = args;
367         *parg++ = (char *)setup_script;
368         *parg++ = (char *)ifname;
369         *parg = NULL;
370         execv(setup_script, args);
371         _exit(1);
372     } else if (pid > 0) {
373         while (waitpid(pid, &status, 0) != pid) {
374             /* loop */
375         }
376 
377         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
378             return 0;
379         }
380     }
381     fprintf(stderr, "%s: could not launch network script\n", setup_script);
382     return -1;
383 }
384 
385 static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
386 {
387     int fd, vnet_hdr_required;
388     char ifname[128] = {0,};
389     const char *setup_script;
390 
391     if (qemu_opt_get(opts, "ifname")) {
392         pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
393     }
394 
395     *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
396     if (qemu_opt_get(opts, "vnet_hdr")) {
397         vnet_hdr_required = *vnet_hdr;
398     } else {
399         vnet_hdr_required = 0;
400     }
401 
402     TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
403     if (fd < 0) {
404         return -1;
405     }
406 
407     setup_script = qemu_opt_get(opts, "script");
408     if (setup_script &&
409         setup_script[0] != '\0' &&
410         strcmp(setup_script, "no") != 0 &&
411         launch_script(setup_script, ifname, fd)) {
412         close(fd);
413         return -1;
414     }
415 
416     qemu_opt_set(opts, "ifname", ifname);
417 
418     return fd;
419 }
420 
421 int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
422 {
423     TAPState *s;
424     int fd, vnet_hdr = 0;
425 
426     if (qemu_opt_get(opts, "fd")) {
427         if (qemu_opt_get(opts, "ifname") ||
428             qemu_opt_get(opts, "script") ||
429             qemu_opt_get(opts, "downscript") ||
430             qemu_opt_get(opts, "vnet_hdr")) {
431             error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
432             return -1;
433         }
434 
435         fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
436         if (fd == -1) {
437             return -1;
438         }
439 
440         fcntl(fd, F_SETFL, O_NONBLOCK);
441 
442         vnet_hdr = tap_probe_vnet_hdr(fd);
443     } else {
444         if (!qemu_opt_get(opts, "script")) {
445             qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
446         }
447 
448         if (!qemu_opt_get(opts, "downscript")) {
449             qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
450         }
451 
452         fd = net_tap_init(opts, &vnet_hdr);
453         if (fd == -1) {
454             return -1;
455         }
456     }
457 
458     s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
459     if (!s) {
460         close(fd);
461         return -1;
462     }
463 
464     if (tap_set_sndbuf(s->fd, opts) < 0) {
465         return -1;
466     }
467 
468     if (qemu_opt_get(opts, "fd")) {
469         snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
470     } else {
471         const char *ifname, *script, *downscript;
472 
473         ifname     = qemu_opt_get(opts, "ifname");
474         script     = qemu_opt_get(opts, "script");
475         downscript = qemu_opt_get(opts, "downscript");
476 
477         snprintf(s->nc.info_str, sizeof(s->nc.info_str),
478                  "ifname=%s,script=%s,downscript=%s",
479                  ifname, script, downscript);
480 
481         if (strcmp(downscript, "no") != 0) {
482             snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
483             snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
484         }
485     }
486 
487     if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") ||
488                           qemu_opt_get_bool(opts, "vhostforce", false))) {
489         int vhostfd, r;
490         bool force = qemu_opt_get_bool(opts, "vhostforce", false);
491         if (qemu_opt_get(opts, "vhostfd")) {
492             r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
493             if (r == -1) {
494                 return -1;
495             }
496             vhostfd = r;
497         } else {
498             vhostfd = -1;
499         }
500         s->vhost_net = vhost_net_init(&s->nc, vhostfd, force);
501         if (!s->vhost_net) {
502             error_report("vhost-net requested but could not be initialized");
503             return -1;
504         }
505     } else if (qemu_opt_get(opts, "vhostfd")) {
506         error_report("vhostfd= is not valid without vhost");
507         return -1;
508     }
509 
510     return 0;
511 }
512 
513 VHostNetState *tap_get_vhost_net(VLANClientState *nc)
514 {
515     TAPState *s = DO_UPCAST(TAPState, nc, nc);
516     assert(nc->info->type == NET_CLIENT_TYPE_TAP);
517     return s->vhost_net;
518 }
519