1 /* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * Copyright (c) 2009 Red Hat, Inc. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "tap.h" 27 28 #include "config-host.h" 29 30 #include <sys/ioctl.h> 31 #include <sys/stat.h> 32 #include <sys/wait.h> 33 #include <sys/socket.h> 34 #include <net/if.h> 35 36 #include "net.h" 37 #include "clients.h" 38 #include "monitor.h" 39 #include "sysemu.h" 40 #include "qemu-char.h" 41 #include "qemu-common.h" 42 #include "qemu-error.h" 43 44 #include "net/tap-linux.h" 45 46 #include "hw/vhost_net.h" 47 48 /* Maximum GSO packet size (64k) plus plenty of room for 49 * the ethernet and virtio_net headers 50 */ 51 #define TAP_BUFSIZE (4096 + 65536) 52 53 typedef struct TAPState { 54 NetClientState nc; 55 int fd; 56 char down_script[1024]; 57 char down_script_arg[128]; 58 uint8_t buf[TAP_BUFSIZE]; 59 unsigned int read_poll : 1; 60 unsigned int write_poll : 1; 61 unsigned int using_vnet_hdr : 1; 62 unsigned int has_ufo: 1; 63 VHostNetState *vhost_net; 64 unsigned host_vnet_hdr_len; 65 } TAPState; 66 67 static int launch_script(const char *setup_script, const char *ifname, int fd); 68 69 static int tap_can_send(void *opaque); 70 static void tap_send(void *opaque); 71 static void tap_writable(void *opaque); 72 73 static void tap_update_fd_handler(TAPState *s) 74 { 75 qemu_set_fd_handler2(s->fd, 76 s->read_poll ? tap_can_send : NULL, 77 s->read_poll ? tap_send : NULL, 78 s->write_poll ? tap_writable : NULL, 79 s); 80 } 81 82 static void tap_read_poll(TAPState *s, int enable) 83 { 84 s->read_poll = !!enable; 85 tap_update_fd_handler(s); 86 } 87 88 static void tap_write_poll(TAPState *s, int enable) 89 { 90 s->write_poll = !!enable; 91 tap_update_fd_handler(s); 92 } 93 94 static void tap_writable(void *opaque) 95 { 96 TAPState *s = opaque; 97 98 tap_write_poll(s, 0); 99 100 qemu_flush_queued_packets(&s->nc); 101 } 102 103 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) 104 { 105 ssize_t len; 106 107 do { 108 len = writev(s->fd, iov, iovcnt); 109 } while (len == -1 && errno == EINTR); 110 111 if (len == -1 && errno == EAGAIN) { 112 tap_write_poll(s, 1); 113 return 0; 114 } 115 116 return len; 117 } 118 119 static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov, 120 int iovcnt) 121 { 122 TAPState *s = DO_UPCAST(TAPState, nc, nc); 123 const struct iovec *iovp = iov; 124 struct iovec iov_copy[iovcnt + 1]; 125 struct virtio_net_hdr_mrg_rxbuf hdr = { }; 126 127 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 128 iov_copy[0].iov_base = &hdr; 129 iov_copy[0].iov_len = s->host_vnet_hdr_len; 130 memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); 131 iovp = iov_copy; 132 iovcnt++; 133 } 134 135 return tap_write_packet(s, iovp, iovcnt); 136 } 137 138 static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size) 139 { 140 TAPState *s = DO_UPCAST(TAPState, nc, nc); 141 struct iovec iov[2]; 142 int iovcnt = 0; 143 struct virtio_net_hdr_mrg_rxbuf hdr = { }; 144 145 if (s->host_vnet_hdr_len) { 146 iov[iovcnt].iov_base = &hdr; 147 iov[iovcnt].iov_len = s->host_vnet_hdr_len; 148 iovcnt++; 149 } 150 151 iov[iovcnt].iov_base = (char *)buf; 152 iov[iovcnt].iov_len = size; 153 iovcnt++; 154 155 return tap_write_packet(s, iov, iovcnt); 156 } 157 158 static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size) 159 { 160 TAPState *s = DO_UPCAST(TAPState, nc, nc); 161 struct iovec iov[1]; 162 163 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 164 return tap_receive_raw(nc, buf, size); 165 } 166 167 iov[0].iov_base = (char *)buf; 168 iov[0].iov_len = size; 169 170 return tap_write_packet(s, iov, 1); 171 } 172 173 static int tap_can_send(void *opaque) 174 { 175 TAPState *s = opaque; 176 177 return qemu_can_send_packet(&s->nc); 178 } 179 180 #ifndef __sun__ 181 ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) 182 { 183 return read(tapfd, buf, maxlen); 184 } 185 #endif 186 187 static void tap_send_completed(NetClientState *nc, ssize_t len) 188 { 189 TAPState *s = DO_UPCAST(TAPState, nc, nc); 190 tap_read_poll(s, 1); 191 } 192 193 static void tap_send(void *opaque) 194 { 195 TAPState *s = opaque; 196 int size; 197 198 do { 199 uint8_t *buf = s->buf; 200 201 size = tap_read_packet(s->fd, s->buf, sizeof(s->buf)); 202 if (size <= 0) { 203 break; 204 } 205 206 if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { 207 buf += s->host_vnet_hdr_len; 208 size -= s->host_vnet_hdr_len; 209 } 210 211 size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); 212 if (size == 0) { 213 tap_read_poll(s, 0); 214 } 215 } while (size > 0 && qemu_can_send_packet(&s->nc)); 216 } 217 218 int tap_has_ufo(NetClientState *nc) 219 { 220 TAPState *s = DO_UPCAST(TAPState, nc, nc); 221 222 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 223 224 return s->has_ufo; 225 } 226 227 int tap_has_vnet_hdr(NetClientState *nc) 228 { 229 TAPState *s = DO_UPCAST(TAPState, nc, nc); 230 231 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 232 233 return !!s->host_vnet_hdr_len; 234 } 235 236 int tap_has_vnet_hdr_len(NetClientState *nc, int len) 237 { 238 TAPState *s = DO_UPCAST(TAPState, nc, nc); 239 240 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 241 242 return tap_probe_vnet_hdr_len(s->fd, len); 243 } 244 245 void tap_set_vnet_hdr_len(NetClientState *nc, int len) 246 { 247 TAPState *s = DO_UPCAST(TAPState, nc, nc); 248 249 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 250 assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || 251 len == sizeof(struct virtio_net_hdr)); 252 253 tap_fd_set_vnet_hdr_len(s->fd, len); 254 s->host_vnet_hdr_len = len; 255 } 256 257 void tap_using_vnet_hdr(NetClientState *nc, int using_vnet_hdr) 258 { 259 TAPState *s = DO_UPCAST(TAPState, nc, nc); 260 261 using_vnet_hdr = using_vnet_hdr != 0; 262 263 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 264 assert(!!s->host_vnet_hdr_len == using_vnet_hdr); 265 266 s->using_vnet_hdr = using_vnet_hdr; 267 } 268 269 void tap_set_offload(NetClientState *nc, int csum, int tso4, 270 int tso6, int ecn, int ufo) 271 { 272 TAPState *s = DO_UPCAST(TAPState, nc, nc); 273 if (s->fd < 0) { 274 return; 275 } 276 277 tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo); 278 } 279 280 static void tap_cleanup(NetClientState *nc) 281 { 282 TAPState *s = DO_UPCAST(TAPState, nc, nc); 283 284 if (s->vhost_net) { 285 vhost_net_cleanup(s->vhost_net); 286 s->vhost_net = NULL; 287 } 288 289 qemu_purge_queued_packets(nc); 290 291 if (s->down_script[0]) 292 launch_script(s->down_script, s->down_script_arg, s->fd); 293 294 tap_read_poll(s, 0); 295 tap_write_poll(s, 0); 296 close(s->fd); 297 s->fd = -1; 298 } 299 300 static void tap_poll(NetClientState *nc, bool enable) 301 { 302 TAPState *s = DO_UPCAST(TAPState, nc, nc); 303 tap_read_poll(s, enable); 304 tap_write_poll(s, enable); 305 } 306 307 int tap_get_fd(NetClientState *nc) 308 { 309 TAPState *s = DO_UPCAST(TAPState, nc, nc); 310 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 311 return s->fd; 312 } 313 314 /* fd support */ 315 316 static NetClientInfo net_tap_info = { 317 .type = NET_CLIENT_OPTIONS_KIND_TAP, 318 .size = sizeof(TAPState), 319 .receive = tap_receive, 320 .receive_raw = tap_receive_raw, 321 .receive_iov = tap_receive_iov, 322 .poll = tap_poll, 323 .cleanup = tap_cleanup, 324 }; 325 326 static TAPState *net_tap_fd_init(NetClientState *peer, 327 const char *model, 328 const char *name, 329 int fd, 330 int vnet_hdr) 331 { 332 NetClientState *nc; 333 TAPState *s; 334 335 nc = qemu_new_net_client(&net_tap_info, peer, model, name); 336 337 s = DO_UPCAST(TAPState, nc, nc); 338 339 s->fd = fd; 340 s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; 341 s->using_vnet_hdr = 0; 342 s->has_ufo = tap_probe_has_ufo(s->fd); 343 tap_set_offload(&s->nc, 0, 0, 0, 0, 0); 344 /* 345 * Make sure host header length is set correctly in tap: 346 * it might have been modified by another instance of qemu. 347 */ 348 if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) { 349 tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len); 350 } 351 tap_read_poll(s, 1); 352 s->vhost_net = NULL; 353 return s; 354 } 355 356 static int launch_script(const char *setup_script, const char *ifname, int fd) 357 { 358 int pid, status; 359 char *args[3]; 360 char **parg; 361 362 /* try to launch network script */ 363 pid = fork(); 364 if (pid == 0) { 365 int open_max = sysconf(_SC_OPEN_MAX), i; 366 367 for (i = 0; i < open_max; i++) { 368 if (i != STDIN_FILENO && 369 i != STDOUT_FILENO && 370 i != STDERR_FILENO && 371 i != fd) { 372 close(i); 373 } 374 } 375 parg = args; 376 *parg++ = (char *)setup_script; 377 *parg++ = (char *)ifname; 378 *parg = NULL; 379 execv(setup_script, args); 380 _exit(1); 381 } else if (pid > 0) { 382 while (waitpid(pid, &status, 0) != pid) { 383 /* loop */ 384 } 385 386 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 387 return 0; 388 } 389 } 390 fprintf(stderr, "%s: could not launch network script\n", setup_script); 391 return -1; 392 } 393 394 static int recv_fd(int c) 395 { 396 int fd; 397 uint8_t msgbuf[CMSG_SPACE(sizeof(fd))]; 398 struct msghdr msg = { 399 .msg_control = msgbuf, 400 .msg_controllen = sizeof(msgbuf), 401 }; 402 struct cmsghdr *cmsg; 403 struct iovec iov; 404 uint8_t req[1]; 405 ssize_t len; 406 407 cmsg = CMSG_FIRSTHDR(&msg); 408 cmsg->cmsg_level = SOL_SOCKET; 409 cmsg->cmsg_type = SCM_RIGHTS; 410 cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); 411 msg.msg_controllen = cmsg->cmsg_len; 412 413 iov.iov_base = req; 414 iov.iov_len = sizeof(req); 415 416 msg.msg_iov = &iov; 417 msg.msg_iovlen = 1; 418 419 len = recvmsg(c, &msg, 0); 420 if (len > 0) { 421 memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); 422 return fd; 423 } 424 425 return len; 426 } 427 428 static int net_bridge_run_helper(const char *helper, const char *bridge) 429 { 430 sigset_t oldmask, mask; 431 int pid, status; 432 char *args[5]; 433 char **parg; 434 int sv[2]; 435 436 sigemptyset(&mask); 437 sigaddset(&mask, SIGCHLD); 438 sigprocmask(SIG_BLOCK, &mask, &oldmask); 439 440 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 441 return -1; 442 } 443 444 /* try to launch bridge helper */ 445 pid = fork(); 446 if (pid == 0) { 447 int open_max = sysconf(_SC_OPEN_MAX), i; 448 char fd_buf[6+10]; 449 char br_buf[6+IFNAMSIZ] = {0}; 450 char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15]; 451 452 for (i = 0; i < open_max; i++) { 453 if (i != STDIN_FILENO && 454 i != STDOUT_FILENO && 455 i != STDERR_FILENO && 456 i != sv[1]) { 457 close(i); 458 } 459 } 460 461 snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]); 462 463 if (strrchr(helper, ' ') || strrchr(helper, '\t')) { 464 /* assume helper is a command */ 465 466 if (strstr(helper, "--br=") == NULL) { 467 snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); 468 } 469 470 snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s", 471 helper, "--use-vnet", fd_buf, br_buf); 472 473 parg = args; 474 *parg++ = (char *)"sh"; 475 *parg++ = (char *)"-c"; 476 *parg++ = helper_cmd; 477 *parg++ = NULL; 478 479 execv("/bin/sh", args); 480 } else { 481 /* assume helper is just the executable path name */ 482 483 snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge); 484 485 parg = args; 486 *parg++ = (char *)helper; 487 *parg++ = (char *)"--use-vnet"; 488 *parg++ = fd_buf; 489 *parg++ = br_buf; 490 *parg++ = NULL; 491 492 execv(helper, args); 493 } 494 _exit(1); 495 496 } else if (pid > 0) { 497 int fd; 498 499 close(sv[1]); 500 501 do { 502 fd = recv_fd(sv[0]); 503 } while (fd == -1 && errno == EINTR); 504 505 close(sv[0]); 506 507 while (waitpid(pid, &status, 0) != pid) { 508 /* loop */ 509 } 510 sigprocmask(SIG_SETMASK, &oldmask, NULL); 511 if (fd < 0) { 512 fprintf(stderr, "failed to recv file descriptor\n"); 513 return -1; 514 } 515 516 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 517 return fd; 518 } 519 } 520 fprintf(stderr, "failed to launch bridge helper\n"); 521 return -1; 522 } 523 524 int net_init_bridge(const NetClientOptions *opts, const char *name, 525 NetClientState *peer) 526 { 527 const NetdevBridgeOptions *bridge; 528 const char *helper, *br; 529 530 TAPState *s; 531 int fd, vnet_hdr; 532 533 assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE); 534 bridge = opts->bridge; 535 536 helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; 537 br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; 538 539 fd = net_bridge_run_helper(helper, br); 540 if (fd == -1) { 541 return -1; 542 } 543 544 fcntl(fd, F_SETFL, O_NONBLOCK); 545 546 vnet_hdr = tap_probe_vnet_hdr(fd); 547 548 s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); 549 if (!s) { 550 close(fd); 551 return -1; 552 } 553 554 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, 555 br); 556 557 return 0; 558 } 559 560 static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr, 561 const char *setup_script, char *ifname, 562 size_t ifname_sz) 563 { 564 int fd, vnet_hdr_required; 565 566 if (tap->has_ifname) { 567 pstrcpy(ifname, ifname_sz, tap->ifname); 568 } else { 569 assert(ifname_sz > 0); 570 ifname[0] = '\0'; 571 } 572 573 if (tap->has_vnet_hdr) { 574 *vnet_hdr = tap->vnet_hdr; 575 vnet_hdr_required = *vnet_hdr; 576 } else { 577 *vnet_hdr = 1; 578 vnet_hdr_required = 0; 579 } 580 581 TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required)); 582 if (fd < 0) { 583 return -1; 584 } 585 586 if (setup_script && 587 setup_script[0] != '\0' && 588 strcmp(setup_script, "no") != 0 && 589 launch_script(setup_script, ifname, fd)) { 590 close(fd); 591 return -1; 592 } 593 594 return fd; 595 } 596 597 int net_init_tap(const NetClientOptions *opts, const char *name, 598 NetClientState *peer) 599 { 600 const NetdevTapOptions *tap; 601 602 int fd, vnet_hdr = 0; 603 const char *model; 604 TAPState *s; 605 606 /* for the no-fd, no-helper case */ 607 const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */ 608 char ifname[128]; 609 610 assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); 611 tap = opts->tap; 612 613 if (tap->has_fd) { 614 if (tap->has_ifname || tap->has_script || tap->has_downscript || 615 tap->has_vnet_hdr || tap->has_helper) { 616 error_report("ifname=, script=, downscript=, vnet_hdr=, " 617 "and helper= are invalid with fd="); 618 return -1; 619 } 620 621 fd = monitor_handle_fd_param(cur_mon, tap->fd); 622 if (fd == -1) { 623 return -1; 624 } 625 626 fcntl(fd, F_SETFL, O_NONBLOCK); 627 628 vnet_hdr = tap_probe_vnet_hdr(fd); 629 630 model = "tap"; 631 632 } else if (tap->has_helper) { 633 if (tap->has_ifname || tap->has_script || tap->has_downscript || 634 tap->has_vnet_hdr) { 635 error_report("ifname=, script=, downscript=, and vnet_hdr= " 636 "are invalid with helper="); 637 return -1; 638 } 639 640 fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE); 641 if (fd == -1) { 642 return -1; 643 } 644 645 fcntl(fd, F_SETFL, O_NONBLOCK); 646 647 vnet_hdr = tap_probe_vnet_hdr(fd); 648 649 model = "bridge"; 650 651 } else { 652 script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT; 653 fd = net_tap_init(tap, &vnet_hdr, script, ifname, sizeof ifname); 654 if (fd == -1) { 655 return -1; 656 } 657 658 model = "tap"; 659 } 660 661 s = net_tap_fd_init(peer, model, name, fd, vnet_hdr); 662 if (!s) { 663 close(fd); 664 return -1; 665 } 666 667 if (tap_set_sndbuf(s->fd, tap) < 0) { 668 return -1; 669 } 670 671 if (tap->has_fd) { 672 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd); 673 } else if (tap->has_helper) { 674 snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s", 675 tap->helper); 676 } else { 677 const char *downscript; 678 679 downscript = tap->has_downscript ? tap->downscript : 680 DEFAULT_NETWORK_DOWN_SCRIPT; 681 682 snprintf(s->nc.info_str, sizeof(s->nc.info_str), 683 "ifname=%s,script=%s,downscript=%s", ifname, script, 684 downscript); 685 686 if (strcmp(downscript, "no") != 0) { 687 snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); 688 snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname); 689 } 690 } 691 692 if (tap->has_vhost ? tap->vhost : 693 tap->has_vhostfd || (tap->has_vhostforce && tap->vhostforce)) { 694 int vhostfd; 695 696 if (tap->has_vhostfd) { 697 vhostfd = monitor_handle_fd_param(cur_mon, tap->vhostfd); 698 if (vhostfd == -1) { 699 return -1; 700 } 701 } else { 702 vhostfd = -1; 703 } 704 705 s->vhost_net = vhost_net_init(&s->nc, vhostfd, 706 tap->has_vhostforce && tap->vhostforce); 707 if (!s->vhost_net) { 708 error_report("vhost-net requested but could not be initialized"); 709 return -1; 710 } 711 } else if (tap->has_vhostfd) { 712 error_report("vhostfd= is not valid without vhost"); 713 return -1; 714 } 715 716 return 0; 717 } 718 719 VHostNetState *tap_get_vhost_net(NetClientState *nc) 720 { 721 TAPState *s = DO_UPCAST(TAPState, nc, nc); 722 assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP); 723 return s->vhost_net; 724 } 725